diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
82 files changed, 8890 insertions, 2956 deletions
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile deleted file mode 100644 index 7e73aa587967..000000000000 --- a/drivers/gpu/drm/i915/gt/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c new file mode 100644 index 000000000000..6a5e9ab20b94 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <drm/drm_print.h> + +#include "debugfs_engines.h" +#include "debugfs_gt.h" +#include "i915_drv.h" /* for_each_engine! */ +#include "intel_engine.h" + +static int engines_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct drm_printer p; + + p = drm_seq_file_printer(m); + for_each_engine(engine, gt, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(engines); + +void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "engines", &engines_fops }, + }; + + debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h new file mode 100644 index 000000000000..f69257eaa1cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_ENGINES_H +#define DEBUGFS_ENGINES_H + +struct intel_gt; +struct dentry; + +void debugfs_engines_register(struct intel_gt *gt, struct dentry *root); + +#endif /* DEBUGFS_ENGINES_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c new file mode 100644 index 000000000000..75255aaacaed --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "debugfs_engines.h" +#include "debugfs_gt.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" + +void debugfs_gt_register(struct intel_gt *gt) +{ + struct dentry *root; + + if (!gt->i915->drm.primary->debugfs_root) + return; + + root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); + if (IS_ERR(root)) + return; + + debugfs_engines_register(gt, root); + debugfs_gt_pm_register(gt, root); +} + +void debugfs_gt_register_files(struct intel_gt *gt, + struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count) +{ + while (count--) { + if (!files->eval || files->eval(gt)) + debugfs_create_file(files->name, + 0444, root, gt, + files->fops); + + files++; + } +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h new file mode 100644 index 000000000000..4ea0f06cda8f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_GT_H +#define DEBUGFS_GT_H + +#include <linux/file.h> + +struct intel_gt; + +#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \ + static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +void debugfs_gt_register(struct intel_gt *gt); + +struct debugfs_gt_file { + const char *name; + const struct file_operations *fops; + bool (*eval)(const struct intel_gt *gt); +}; + +void debugfs_gt_register_files(struct intel_gt *gt, + struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count); + +#endif /* DEBUGFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c new file mode 100644 index 000000000000..059c9e5c002e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/seq_file.h> + +#include "debugfs_gt.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_rc6.h" +#include "intel_rps.h" +#include "intel_runtime_pm.h" +#include "intel_sideband.h" +#include "intel_uncore.h" + +static int fw_domains_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + struct intel_uncore_forcewake_domain *fw_domain; + unsigned int tmp; + + seq_printf(m, "user.bypass_count = %u\n", + uncore->user_forcewake_count); + + for_each_fw_domain(fw_domain, uncore, tmp) + seq_printf(m, "%s.wake_count = %u\n", + intel_uncore_forcewake_domain_to_str(fw_domain->id), + READ_ONCE(fw_domain->wake_count)); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains); + +static void print_rc6_res(struct seq_file *m, + const char *title, + const i915_reg_t reg) +{ + struct intel_gt *gt = m->private; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(gt->uncore, reg), + intel_rc6_residency_us(>->rc6, reg)); +} + +static int vlv_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rcctl1, pw_status; + + pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_EI_MODE(1)))); + seq_printf(m, "Render Power Well: %s\n", + (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); + + print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6); + print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + + return fw_domains_show(m, NULL); +} + +static int gen6_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; + + gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + if (INTEL_GEN(i915) >= 9) { + gen9_powergate_enable = + intel_uncore_read(uncore, GEN9_PG_ENABLE); + gen9_powergate_status = + intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS); + } + + if (INTEL_GEN(i915) <= 7) + sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + + seq_printf(m, "RC1e Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_GEN(i915) >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } + seq_printf(m, "Deep RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); + seq_printf(m, "Deepest RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE)); + seq_puts(m, "Current RC state: "); + switch (gt_core_status & GEN6_RCn_MASK) { + case GEN6_RC0: + if (gt_core_status & GEN6_CORE_CPD_STATE_MASK) + seq_puts(m, "Core Power Down\n"); + else + seq_puts(m, "on\n"); + break; + case GEN6_RC3: + seq_puts(m, "RC3\n"); + break; + case GEN6_RC6: + seq_puts(m, "RC6\n"); + break; + case GEN6_RC7: + seq_puts(m, "RC7\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Core Power Down: %s\n", + yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_GEN(i915) >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } + + /* Not exactly sure what this is */ + print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", + GEN6_GT_GFX_RC6_LOCKED); + print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); + print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); + print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + + if (INTEL_GEN(i915) <= 7) { + seq_printf(m, "RC6 voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); + seq_printf(m, "RC6+ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); + seq_printf(m, "RC6++ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); + } + + return fw_domains_show(m, NULL); +} + +static int ilk_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rgvmodectl, rstdbyctl; + u16 crstandvid; + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL); + crstandvid = intel_uncore_read16(uncore, CRSTANDVID); + + seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); + seq_printf(m, "Boost freq: %d\n", + (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >> + MEMMODE_BOOST_FREQ_SHIFT); + seq_printf(m, "HW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_HWIDLE_EN)); + seq_printf(m, "SW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_SWMODE_EN)); + seq_printf(m, "Gated voltage change: %s\n", + yesno(rgvmodectl & MEMMODE_RCLK_GATE)); + seq_printf(m, "Starting frequency: P%d\n", + (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT); + seq_printf(m, "Max P-state: P%d\n", + (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT); + seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK)); + seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f)); + seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f)); + seq_printf(m, "Render standby enabled: %s\n", + yesno(!(rstdbyctl & RCX_SW_EXIT))); + seq_puts(m, "Current RS state: "); + switch (rstdbyctl & RSX_STATUS_MASK) { + case RSX_STATUS_ON: + seq_puts(m, "on\n"); + break; + case RSX_STATUS_RC1: + seq_puts(m, "RC1\n"); + break; + case RSX_STATUS_RC1E: + seq_puts(m, "RC1E\n"); + break; + case RSX_STATUS_RS1: + seq_puts(m, "RS1\n"); + break; + case RSX_STATUS_RS2: + seq_puts(m, "RS2 (RC6)\n"); + break; + case RSX_STATUS_RS3: + seq_puts(m, "RC3 (RC6+)\n"); + break; + default: + seq_puts(m, "unknown\n"); + break; + } + + return 0; +} + +static int drpc_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + intel_wakeref_t wakeref; + int err = -ENODEV; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_drpc(m); + else if (INTEL_GEN(i915) >= 6) + err = gen6_drpc(m); + else + err = ilk_drpc(m); + } + + return err; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc); + +static int frequency_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps *rps = >->rps; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(uncore->rpm); + + if (IS_GEN(i915, 5)) { + u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); + + seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); + seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); + seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> + MEMSTAT_VID_SHIFT); + seq_printf(m, "Current P-state: %d\n", + (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + u32 rpmodectl, freq_sts; + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + vlv_punit_get(i915); + freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); + seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); + + seq_printf(m, "actual GPU freq: %d MHz\n", + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); + + seq_printf(m, "current GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + + seq_printf(m, "max GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "min GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + + seq_printf(m, "idle GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + + seq_printf(m, "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else if (INTEL_GEN(i915) >= 6) { + u32 rp_state_limits; + u32 gt_perf_status; + u32 rp_state_cap; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpupei, rpcurup, rpprevup; + u32 rpdownei, rpcurdown, rpprevdown; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + int max_freq; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + if (IS_GEN9_LP(i915)) { + rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + } else { + rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + } + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (INTEL_GEN(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (INTEL_GEN(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (INTEL_GEN(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (INTEL_GEN(i915) <= 10) + seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + seq_printf(m, "Render p-state ratio: %d\n", + (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + seq_printf(m, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + seq_printf(m, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); + seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); + seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); + seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); + seq_printf(m, "CAGF: %dMHz\n", cagf); + seq_printf(m, "RP CUR UP EI: %d (%dus)\n", + rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei)); + seq_printf(m, "RP CUR UP: %d (%dus)\n", + rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%dus)\n", + rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup)); + seq_printf(m, "Up threshold: %d%%\n", + rps->power.up_threshold); + + seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", + rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei)); + seq_printf(m, "RP CUR DOWN: %d (%dus)\n", + rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%dus)\n", + rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown)); + seq_printf(m, "Down threshold: %d%%\n", + rps->power.down_threshold); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : + rp_state_cap >> 16) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (rp_state_cap & 0xff00) >> 8; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : + rp_state_cap >> 0) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + seq_printf(m, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + seq_printf(m, "Actual freq: %d MHz\n", cagf); + seq_printf(m, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + seq_printf(m, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + seq_printf(m, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else { + seq_puts(m, "no P-state info available\n"); + } + + seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); + seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); + + intel_runtime_pm_put(uncore->rpm, wakeref); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency); + +static int llc_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + const bool edram = INTEL_GEN(i915) > 8; + struct intel_rps *rps = >->rps; + unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; + int gpu_freq, ia_freq; + + seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915))); + seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC", + i915->edram_size_mb); + + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Convert GT frequency to 50 HZ units */ + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; + } + + seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { + ia_freq = gpu_freq; + sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq, NULL); + seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(i915) || + INTEL_GEN(i915) >= 10 ? + GEN9_FREQ_SCALER : 1))), + ((ia_freq >> 0) & 0xff) * 100, + ((ia_freq >> 8) & 0xff) * 100); + } + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return 0; +} + +static bool llc_eval(const struct intel_gt *gt) +{ + return HAS_LLC(gt->i915); +} + +DEFINE_GT_DEBUGFS_ATTRIBUTE(llc); + +static const char *rps_power_to_str(unsigned int power) +{ + static const char * const strings[] = { + [LOW_POWER] = "low power", + [BETWEEN] = "mixed", + [HIGH_POWER] = "high power", + }; + + if (power >= ARRAY_SIZE(strings) || !strings[power]) + return "unknown"; + + return strings[power]; +} + +static int rps_boost_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_rps *rps = >->rps; + + seq_printf(m, "RPS enabled? %d\n", rps->enabled); + seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); + seq_printf(m, "Boosts outstanding? %d\n", + atomic_read(&rps->num_waiters)); + seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); + seq_printf(m, "Frequency requested %d, actual %d\n", + intel_gpu_freq(rps, rps->cur_freq), + intel_rps_read_actual_frequency(rps)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); + + seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + + if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) { + struct intel_uncore *uncore = gt->uncore; + u32 rpup, rpupei; + u32 rpdown, rpdownei; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; + rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; + rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; + rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", + rps_power_to_str(rps->power.mode)); + seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", + rpup && rpupei ? 100 * rpup / rpupei : 0, + rps->power.up_threshold); + seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, + rps->power.down_threshold); + } else { + seq_puts(m, "\nRPS Autotuning inactive\n"); + } + + return 0; +} + +static bool rps_eval(const struct intel_gt *gt) +{ + return HAS_RPS(gt->i915); +} + +DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost); + +void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "drpc", &drpc_fops, NULL }, + { "frequency", &frequency_fops, NULL }, + { "forcewake", &fw_domains_fops, NULL }, + { "llc", &llc_fops, llc_eval }, + { "rps_boost", &rps_boost_fops, rps_eval }, + }; + + debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h new file mode 100644 index 000000000000..4cf5f5c9da7d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef DEBUGFS_GT_PM_H +#define DEBUGFS_GT_PM_H + +struct intel_gt; +struct dentry; + +void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root); + +#endif /* DEBUGFS_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c new file mode 100644 index 000000000000..f4fec7eb4064 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/log2.h> + +#include "gen6_ppgtt.h" +#include "i915_scatterlist.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_gt.h" + +/* Write pde (index) from the page directory @pd to the page table @pt */ +static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) +{ + /* Caller needs to make sure the write completes if necessary */ + iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); +} + +void gen7_ppgtt_enable(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 ecochk; + + intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); + + ecochk = intel_uncore_read(uncore, GAM_ECOCHK); + if (IS_HASWELL(i915)) { + ecochk |= ECOCHK_PPGTT_WB_HSW; + } else { + ecochk |= ECOCHK_PPGTT_LLC_IVB; + ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; + } + intel_uncore_write(uncore, GAM_ECOCHK, ecochk); + + for_each_engine(engine, gt, id) { + /* GFX_MODE is per-ring on gen7+ */ + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + } +} + +void gen6_ppgtt_enable(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_rmw(uncore, + GAC_ECO_BITS, + 0, + ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); + + intel_uncore_rmw(uncore, + GAB_CTL, + 0, + GAB_CTL_CONT_AFTER_PAGEFAULT); + + intel_uncore_rmw(uncore, + GAM_ECOCHK, + 0, + ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); + + if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ + intel_uncore_write(uncore, + GFX_MODE, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); +} + +/* PPGTT support for Sandybdrige/Gen6 and later */ +static void gen6_ppgtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + const gen6_pte_t scratch_pte = vm->scratch[0].encode; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + + while (num_entries) { + struct i915_page_table * const pt = + i915_pt_entry(ppgtt->base.pd, pde++); + const unsigned int count = min(num_entries, GEN6_PTES - pte); + gen6_pte_t *vaddr; + + GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); + + num_entries -= count; + + GEM_BUG_ON(count > atomic_read(&pt->used)); + if (!atomic_sub_return(count, &pt->used)) + ppgtt->scan_for_unused_pt = true; + + /* + * Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ + + vaddr = kmap_atomic_px(pt); + memset32(vaddr + pte, scratch_pte, count); + kunmap_atomic(vaddr); + + pte = 0; + } +} + +static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_page_directory * const pd = ppgtt->pd; + unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; + unsigned int act_pt = first_entry / GEN6_PTES; + unsigned int act_pte = first_entry % GEN6_PTES; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter = sgt_dma(vma); + gen6_pte_t *vaddr; + + GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); + + vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); + do { + GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE); + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); + + iter.dma += I915_GTT_PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; + + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } + + if (++act_pte == GEN6_PTES) { + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); + act_pte = 0; + } + } while (1); + kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; +} + +static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) +{ + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_table *pt; + unsigned int pde; + + start = round_down(start, SZ_64K); + end = round_up(end, SZ_64K) - start; + + mutex_lock(&ppgtt->flush); + + gen6_for_each_pde(pt, pd, start, end, pde) + gen6_write_pde(ppgtt, pde, pt); + + mb(); + ioread32(ppgtt->pd_addr + pde - 1); + gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); + mb(); + + mutex_unlock(&ppgtt->flush); +} + +static int gen6_alloc_va_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_table *pt, *alloc = NULL; + intel_wakeref_t wakeref; + u64 from = start; + unsigned int pde; + int ret = 0; + + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + + spin_lock(&pd->lock); + gen6_for_each_pde(pt, pd, start, length, pde) { + const unsigned int count = gen6_pte_count(start, length); + + if (px_base(pt) == px_base(&vm->scratch[1])) { + spin_unlock(&pd->lock); + + pt = fetch_and_zero(&alloc); + if (!pt) + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto unwind_out; + } + + fill32_px(pt, vm->scratch[0].encode); + + spin_lock(&pd->lock); + if (pd->entry[pde] == &vm->scratch[1]) { + pd->entry[pde] = pt; + } else { + alloc = pt; + pt = pd->entry[pde]; + } + } + + atomic_add(count, &pt->used); + } + spin_unlock(&pd->lock); + + if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) + gen6_flush_pd(ppgtt, from, start); + + goto out; + +unwind_out: + gen6_ppgtt_clear_range(vm, from, start - from); +out: + if (alloc) + free_px(vm, alloc); + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); + return ret; +} + +static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) +{ + struct i915_address_space * const vm = &ppgtt->base.vm; + struct i915_page_directory * const pd = ppgtt->base.pd; + int ret; + + ret = setup_scratch_page(vm, __GFP_HIGHMEM); + if (ret) + return ret; + + vm->scratch[0].encode = + vm->pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_NONE, PTE_READ_ONLY); + + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { + cleanup_scratch_page(vm); + return -ENOMEM; + } + + fill32_px(&vm->scratch[1], vm->scratch[0].encode); + memset_p(pd->entry, &vm->scratch[1], I915_PDES); + + return 0; +} + +static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) +{ + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); + struct i915_page_table *pt; + u32 pde; + + gen6_for_all_pdes(pt, pd, pde) + if (px_base(pt) != scratch) + free_px(&ppgtt->base.vm, pt); +} + +static void gen6_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + + __i915_vma_put(ppgtt->vma); + + gen6_ppgtt_free_pd(ppgtt); + free_scratch(vm); + + mutex_destroy(&ppgtt->flush); + mutex_destroy(&ppgtt->pin_mutex); + kfree(ppgtt->base.pd); +} + +static int pd_vma_set_pages(struct i915_vma *vma) +{ + vma->pages = ERR_PTR(-ENODEV); + return 0; +} + +static void pd_vma_clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + vma->pages = NULL; +} + +static int pd_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); + struct gen6_ppgtt *ppgtt = vma->private; + u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; + + px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; + + gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); + return 0; +} + +static void pd_vma_unbind(struct i915_vma *vma) +{ + struct gen6_ppgtt *ppgtt = vma->private; + struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); + struct i915_page_table *pt; + unsigned int pde; + + if (!ppgtt->scan_for_unused_pt) + return; + + /* Free all no longer used page tables */ + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { + if (px_base(pt) == scratch || atomic_read(&pt->used)) + continue; + + free_px(&ppgtt->base.vm, pt); + pd->entry[pde] = scratch; + } + + ppgtt->scan_for_unused_pt = false; +} + +static const struct i915_vma_ops pd_vma_ops = { + .set_pages = pd_vma_set_pages, + .clear_pages = pd_vma_clear_pages, + .bind_vma = pd_vma_bind, + .unbind_vma = pd_vma_unbind, +}; + +static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) +{ + struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_vma *vma; + + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(size > ggtt->vm.total); + + vma = i915_vma_alloc(); + if (!vma) + return ERR_PTR(-ENOMEM); + + i915_active_init(&vma->active, NULL, NULL); + + kref_init(&vma->ref); + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(&ggtt->vm); + vma->ops = &pd_vma_ops; + vma->private = ppgtt; + + vma->size = size; + vma->fence_size = size; + atomic_set(&vma->flags, I915_VMA_GGTT); + vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ + + INIT_LIST_HEAD(&vma->obj_link); + INIT_LIST_HEAD(&vma->closed_link); + + return vma; +} + +int gen6_ppgtt_pin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + int err; + + GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); + + /* + * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt + * which will be pinned into every active context. + * (When vma->pin_count becomes atomic, I expect we will naturally + * need a larger, unpacked, type and kill this redundancy.) + */ + if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) + return 0; + + if (mutex_lock_interruptible(&ppgtt->pin_mutex)) + return -EINTR; + + /* + * PPGTT PDEs reside in the GGTT and consists of 512 entries. The + * allocator works in address space sizes, so it's multiplied by page + * size. We allocate at the top of the GTT to avoid fragmentation. + */ + err = 0; + if (!atomic_read(&ppgtt->pin_count)) + err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); + if (!err) + atomic_inc(&ppgtt->pin_count); + mutex_unlock(&ppgtt->pin_mutex); + + return err; +} + +void gen6_ppgtt_unpin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); +} + +void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + if (!atomic_read(&ppgtt->pin_count)) + return; + + i915_vma_unpin(ppgtt->vma); + atomic_set(&ppgtt->pin_count, 0); +} + +struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ggtt * const ggtt = gt->ggtt; + struct gen6_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + mutex_init(&ppgtt->flush); + mutex_init(&ppgtt->pin_mutex); + + ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.top = 1; + + ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; + ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; + ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; + + ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; + + ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); + if (!ppgtt->base.pd) { + err = -ENOMEM; + goto err_free; + } + + err = gen6_ppgtt_init_scratch(ppgtt); + if (err) + goto err_pd; + + ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + goto err_scratch; + } + + return &ppgtt->base; + +err_scratch: + free_scratch(&ppgtt->base.vm); +err_pd: + kfree(ppgtt->base.pd); +err_free: + mutex_destroy(&ppgtt->pin_mutex); + kfree(ppgtt); + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h new file mode 100644 index 000000000000..72e481806c96 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN6_PPGTT_H__ +#define __GEN6_PPGTT_H__ + +#include "intel_gtt.h" + +struct gen6_ppgtt { + struct i915_ppgtt base; + + struct mutex flush; + struct i915_vma *vma; + gen6_pte_t __iomem *pd_addr; + + atomic_t pin_count; + struct mutex pin_mutex; + + bool scan_for_unused_pt; +}; + +static inline u32 gen6_pte_index(u32 addr) +{ + return i915_pte_index(addr, GEN6_PDE_SHIFT); +} + +static inline u32 gen6_pte_count(u32 addr, u32 length) +{ + return i915_pte_count(addr, length, GEN6_PDE_SHIFT); +} + +static inline u32 gen6_pde_index(u32 addr) +{ + return i915_pde_index(addr, GEN6_PDE_SHIFT); +} + +#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base) + +static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) +{ + BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base)); + return __to_gen6_ppgtt(base); +} + +/* + * gen6_for_each_pde() iterates over every pde from start until start+length. + * If start and start+length are not perfectly divisible, the macro will round + * down and up as needed. Start=0 and length=2G effectively iterates over + * every PDE in the system. The macro modifies ALL its parameters except 'pd', + * so each of the other parameters should preferably be a simple variable, or + * at most an lvalue with no side-effects! + */ +#define gen6_for_each_pde(pt, pd, start, length, iter) \ + for (iter = gen6_pde_index(start); \ + length > 0 && iter < I915_PDES && \ + (pt = i915_pt_entry(pd, iter), true); \ + ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ + temp = min(temp - start, length); \ + start += temp, length -= temp; }), ++iter) + +#define gen6_for_all_pdes(pt, pd, iter) \ + for (iter = 0; \ + iter < I915_PDES && \ + (pt = i915_pt_entry(pd, iter), true); \ + ++iter) + +int gen6_ppgtt_pin(struct i915_ppgtt *base); +void gen6_ppgtt_unpin(struct i915_ppgtt *base); +void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); +void gen6_ppgtt_enable(struct intel_gt *gt); +void gen7_ppgtt_enable(struct intel_gt *gt); +struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c new file mode 100644 index 000000000000..4d1de2d97d5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -0,0 +1,724 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/log2.h> + +#include "gen8_ppgtt.h" +#include "i915_scatterlist.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_gt.h" +#include "intel_gtt.h" + +static u64 gen8_pde_encode(const dma_addr_t addr, + const enum i915_cache_level level) +{ + u64 pde = addr | _PAGE_PRESENT | _PAGE_RW; + + if (level != I915_CACHE_NONE) + pde |= PPAT_CACHED_PDE; + else + pde |= PPAT_UNCACHED; + + return pde; +} + +static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) +{ + struct drm_i915_private *i915 = ppgtt->vm.i915; + struct intel_uncore *uncore = ppgtt->vm.gt->uncore; + enum vgt_g2v_type msg; + int i; + + if (create) + atomic_inc(px_used(ppgtt->pd)); /* never remove */ + else + atomic_dec(px_used(ppgtt->pd)); + + mutex_lock(&i915->vgpu.lock); + + if (i915_vm_is_4lvl(&ppgtt->vm)) { + const u64 daddr = px_dma(ppgtt->pd); + + intel_uncore_write(uncore, + vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); + intel_uncore_write(uncore, + vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); + + msg = create ? + VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; + } else { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + + intel_uncore_write(uncore, + vgtif_reg(pdp[i].lo), + lower_32_bits(daddr)); + intel_uncore_write(uncore, + vgtif_reg(pdp[i].hi), + upper_32_bits(daddr)); + } + + msg = create ? + VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; + } + + /* g2v_notify atomically (via hv trap) consumes the message packet. */ + intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); + + mutex_unlock(&i915->vgpu.lock); +} + +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ +#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ +#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) +#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) +#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) + +#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) + +static inline unsigned int +gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) +{ + const int shift = gen8_pd_shift(lvl); + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(start >= end); + end += ~mask >> gen8_pd_shift(1); + + *idx = i915_pde_index(start, shift); + if ((start ^ end) & mask) + return GEN8_PDES - *idx; + else + return i915_pde_index(end, shift) - *idx; +} + +static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) +{ + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(start >= end); + return (start ^ end) & mask && (start & ~mask) == 0; +} + +static inline unsigned int gen8_pt_count(u64 start, u64 end) +{ + GEM_BUG_ON(start >= end); + if ((start ^ end) >> gen8_pd_shift(1)) + return GEN8_PDES - (start & (GEN8_PDES - 1)); + else + return end - start; +} + +static inline unsigned int +gen8_pd_top_count(const struct i915_address_space *vm) +{ + unsigned int shift = __gen8_pte_shift(vm->top); + return (vm->total + (1ull << shift) - 1) >> shift; +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + + if (vm->top == 2) + return ppgtt->pd; + else + return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) +{ + return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); +} + +static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, + struct i915_page_directory *pd, + int count, int lvl) +{ + if (lvl) { + void **pde = pd->entry; + + do { + if (!*pde) + continue; + + __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); + } while (pde++, --count); + } + + free_px(vm, pd); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + + if (intel_vgpu_active(vm->i915)) + gen8_ppgtt_notify_vgt(ppgtt, false); + + __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); + free_scratch(vm); +} + +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 start, const u64 end, int lvl) +{ + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + unsigned int idx, len; + + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + + len = gen8_pd_range(start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); + + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && + gen8_pd_contains(start, end, lvl)) { + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); + clear_pd_entry(pd, idx, scratch); + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); + start += (u64)I915_PDES << gen8_pd_shift(lvl); + continue; + } + + if (lvl) { + start = __gen8_ppgtt_clear(vm, as_pd(pt), + start, end, lvl); + } else { + unsigned int count; + u64 *vaddr; + + count = gen8_pt_count(start, end); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); + GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); + + vaddr = kmap_atomic_px(pt); + memset64(vaddr + gen8_pd_index(start, 0), + vm->scratch[0].encode, + count); + kunmap_atomic(vaddr); + + atomic_sub(count, &pt->used); + start += count; + } + + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + } while (idx++, --len); + + return start; +} + +static void gen8_ppgtt_clear(struct i915_address_space *vm, + u64 start, u64 length) +{ + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + start, start + length, vm->top); +} + +static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 * const start, const u64 end, int lvl) +{ + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + struct i915_page_table *alloc = NULL; + unsigned int idx, len; + int ret = 0; + + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + + len = gen8_pd_range(*start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); + + spin_lock(&pd->lock); + GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (!pt) { + spin_unlock(&pd->lock); + + DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); + + pt = fetch_and_zero(&alloc); + if (lvl) { + if (!pt) { + pt = &alloc_pd(vm)->pt; + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto out; + } + } + + fill_px(pt, vm->scratch[lvl].encode); + } else { + if (!pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto out; + } + } + + if (intel_vgpu_active(vm->i915) || + gen8_pt_count(*start, end) < I915_PDES) + fill_px(pt, vm->scratch[lvl].encode); + } + + spin_lock(&pd->lock); + if (likely(!pd->entry[idx])) + set_pd_entry(pd, idx, pt); + else + alloc = pt, pt = pd->entry[idx]; + } + + if (lvl) { + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + ret = __gen8_ppgtt_alloc(vm, as_pd(pt), + start, end, lvl); + if (unlikely(ret)) { + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + goto out; + } + + spin_lock(&pd->lock); + atomic_dec(&pt->used); + GEM_BUG_ON(!atomic_read(&pt->used)); + } else { + unsigned int count = gen8_pt_count(*start, end); + + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); + + atomic_add(count, &pt->used); + /* All other pdes may be simultaneously removed */ + GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); + *start += count; + } + } while (idx++, --len); + spin_unlock(&pd->lock); +out: + if (alloc) + free_px(vm, alloc); + return ret; +} + +static int gen8_ppgtt_alloc(struct i915_address_space *vm, + u64 start, u64 length) +{ + u64 from; + int err; + + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + from = start; + + err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top); + if (unlikely(err && from != start)) + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + from, start, vm->top); + + return err; +} + +static __always_inline u64 +gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, + struct i915_page_directory *pdp, + struct sgt_dma *iter, + u64 idx, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + gen8_pte_t *vaddr; + + pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + do { + GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); + vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; + + iter->dma += I915_GTT_PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + idx = 0; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; + } + + if (gen8_pd_index(++idx, 0) == 0) { + if (gen8_pd_index(idx, 1) == 0) { + /* Limited by sg length for 3lvl */ + if (gen8_pd_index(idx, 2) == 0) + break; + + pd = pdp->entry[gen8_pd_index(idx, 2)]; + } + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + } + } while (1); + kunmap_atomic(vaddr); + + return idx; +} + +static void gen8_ppgtt_insert_huge(struct i915_vma *vma, + struct sgt_dma *iter, + enum i915_cache_level cache_level, + u32 flags) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_address(vma->vm, start); + struct i915_page_directory * const pd = + i915_pd_entry(pdp, __gen8_pte_index(start, 2)); + gen8_pte_t encode = pte_encode; + unsigned int maybe_64K = -1; + unsigned int page_size; + gen8_pte_t *vaddr; + u16 index; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && + !__gen8_pte_index(start, 0)) { + index = __gen8_pte_index(start, 1); + encode |= GEN8_PDE_PS_2M; + page_size = I915_GTT_PAGE_SIZE_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = + i915_pt_entry(pd, __gen8_pte_index(start, 1)); + + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; + + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) + maybe_64K = __gen8_pte_index(start, 1); + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (maybe_64K != -1 && index < I915_PDES && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) + maybe_64K = -1; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < I915_PDES); + + kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K != -1 && + (index == I915_PDES || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = vma->vm->scratch[0].encode; + vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } + } + + vma->page_sizes.gtt |= page_size; + } while (iter->sg); +} + +static void gen8_ppgtt_insert(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = sgt_dma(vma); + + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); + } else { + u64 idx = vma->node.start >> GEN8_PTE_SHIFT; + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + + idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, + cache_level, flags); + } while (idx); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + } +} + +static int gen8_init_scratch(struct i915_address_space *vm) +{ + int ret; + int i; + + /* + * If everybody agrees to not to write into the scratch page, + * we can reuse it for all vm, keeping contexts and processes separate. + */ + if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { + struct i915_address_space *clone = vm->gt->vm; + + GEM_BUG_ON(!clone->has_read_only); + + vm->scratch_order = clone->scratch_order; + memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); + px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ + return 0; + } + + ret = setup_scratch_page(vm, __GFP_HIGHMEM); + if (ret) + return ret; + + vm->scratch[0].encode = + gen8_pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_LLC, vm->has_read_only); + + for (i = 1; i <= vm->top; i++) { + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) + goto free_scratch; + + fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); + vm->scratch[i].encode = + gen8_pde_encode(px_dma(&vm->scratch[i]), + I915_CACHE_LLC); + } + + return 0; + +free_scratch: + free_scratch(vm); + return -ENOMEM; +} + +static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) +{ + struct i915_address_space *vm = &ppgtt->vm; + struct i915_page_directory *pd = ppgtt->pd; + unsigned int idx; + + GEM_BUG_ON(vm->top != 2); + GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); + + for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { + struct i915_page_directory *pde; + + pde = alloc_pd(vm); + if (IS_ERR(pde)) + return PTR_ERR(pde); + + fill_px(pde, vm->scratch[1].encode); + set_pd_entry(pd, idx, pde); + atomic_inc(px_used(pde)); /* keep pinned */ + } + wmb(); + + return 0; +} + +static struct i915_page_directory * +gen8_alloc_top_pd(struct i915_address_space *vm) +{ + const unsigned int count = gen8_pd_top_count(vm); + struct i915_page_directory *pd; + + GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); + + pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + atomic_inc(px_used(pd)); /* mark as pinned */ + return pd; +} + +/* + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers + * with a net effect resembling a 2-level page table in normal x86 terms. Each + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address + * space. + * + */ +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ppgtt_init(ppgtt, gt); + ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; + + /* + * From bdw, there is hw support for read-only pages in the PPGTT. + * + * Gen11 has HSDES#:1807136187 unresolved. Disable ro support + * for now. + * + * Gen12 has inherited the same read-only fault issue from gen11. + */ + ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12); + + /* + * There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915)) + ppgtt->vm.pt_kmap_wc = true; + + err = gen8_init_scratch(&ppgtt->vm); + if (err) + goto err_free; + + ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); + if (IS_ERR(ppgtt->pd)) { + err = PTR_ERR(ppgtt->pd); + goto err_free_scratch; + } + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) + goto err_free_pd; + } + + ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; + ppgtt->vm.insert_entries = gen8_ppgtt_insert; + ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; + ppgtt->vm.clear_range = gen8_ppgtt_clear; + + if (intel_vgpu_active(gt->i915)) + gen8_ppgtt_notify_vgt(ppgtt, true); + + ppgtt->vm.cleanup = gen8_ppgtt_cleanup; + + return ppgtt; + +err_free_pd: + __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, + gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); +err_free_scratch: + free_scratch(&ppgtt->vm); +err_free: + kfree(ppgtt); + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h new file mode 100644 index 000000000000..76a08b9c1f5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN8_PPGTT_H__ +#define __GEN8_PPGTT_H__ + +struct intel_gt; + +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 55317081d48b..0ba524a414c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,6 +28,8 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" static void irq_enable(struct intel_engine_cs *engine) { @@ -53,15 +55,17 @@ static void irq_disable(struct intel_engine_cs *engine) static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + lockdep_assert_held(&b->irq_lock); GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(container_of(b, - struct intel_engine_cs, - breadcrumbs)); + irq_disable(engine); b->irq_armed = false; + intel_gt_pm_put_async(engine->gt); } void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) @@ -127,16 +131,23 @@ __dma_fence_signal__notify(struct dma_fence *fence, } } -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) +static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + intel_engine_add_retire(engine, tl); +} + +static void signal_irq_work(struct irq_work *work) +{ + struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; - unsigned long flags; LIST_HEAD(signal); - spin_lock_irqsave(&b->irq_lock, flags); + spin_lock(&b->irq_lock); if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); @@ -177,44 +188,41 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) if (!list_is_first(pos, &ce->signals)) { /* Advance the list to the first incomplete request */ __list_del_many(&ce->signals, pos); - if (&ce->signals == pos) /* now empty */ + if (&ce->signals == pos) { /* now empty */ list_del_init(&ce->signal_link); + add_retire(b, ce->timeline); + } } } - spin_unlock_irqrestore(&b->irq_lock, flags); + spin_unlock(&b->irq_lock); list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); struct list_head cb_list; - spin_lock_irqsave(&rq->lock, flags); + spin_lock(&rq->lock); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); __dma_fence_signal__notify(&rq->fence, &cb_list); - spin_unlock_irqrestore(&rq->lock, flags); + spin_unlock(&rq->lock); i915_request_put(rq); } } -static void signal_irq_work(struct irq_work *work) -{ - struct intel_engine_cs *engine = - container_of(work, typeof(*engine), breadcrumbs.irq_work); - - intel_engine_breadcrumbs_irq(engine); -} - -static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return; + return true; + + if (!intel_gt_pm_get_if_awake(engine->gt)) + return false; /* * The breadcrumb irq will be disarmed on the interrupt after the @@ -234,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) if (!b->irq_enabled++) irq_enable(engine); + + return true; } void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -271,19 +281,20 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; struct list_head *pos; spin_lock(&b->irq_lock); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - __intel_breadcrumbs_arm_irq(b); + if (!__intel_breadcrumbs_arm_irq(b)) + goto unlock; /* * We keep the seqno in retirement order, so we can break - * inside intel_engine_breadcrumbs_irq as soon as we've passed - * the last completed request (or seen a request that hasn't - * event started). We could iterate the timeline->requests list, + * inside intel_engine_signal_breadcrumbs as soon as we've + * passed the last completed request (or seen a request that + * hasn't event started). We could walk the timeline->requests, * but keeping a separate signalers_list has the advantage of * hopefully being much smaller than the full list and so * provides faster iteration and detection when there are no @@ -306,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); +unlock: spin_unlock(&b->irq_lock); } @@ -326,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) */ spin_lock(&b->irq_lock); if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; list_del(&rq->signal_link); if (list_empty(&ce->signals)) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5b7ff3ccfa8e..23137b2a8689 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -31,8 +31,7 @@ void intel_context_free(struct intel_context *ce) } struct intel_context * -intel_context_create(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +intel_context_create(struct intel_engine_cs *engine) { struct intel_context *ce; @@ -40,39 +39,82 @@ intel_context_create(struct i915_gem_context *ctx, if (!ce) return ERR_PTR(-ENOMEM); - intel_context_init(ce, ctx, engine); + intel_context_init(ce, engine); return ce; } -int __intel_context_do_pin(struct intel_context *ce) +int intel_context_alloc_state(struct intel_context *ce) { - int err; + int err = 0; if (mutex_lock_interruptible(&ce->pin_mutex)) return -EINTR; - if (likely(!atomic_read(&ce->pin_count))) { - intel_wakeref_t wakeref; + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = ce->ops->alloc(ce); + if (unlikely(err)) + goto unlock; - if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { - err = ce->ops->alloc(ce); - if (unlikely(err)) - goto err; + set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + } + +unlock: + mutex_unlock(&ce->pin_mutex); + return err; +} + +static int intel_context_active_acquire(struct intel_context *ce) +{ + int err; - __set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + err = i915_active_acquire(&ce->active); + if (err) + return err; + + /* Preallocate tracking nodes */ + if (!intel_context_is_barrier(ce)) { + err = i915_active_acquire_preallocate_barrier(&ce->active, + ce->engine); + if (err) { + i915_active_release(&ce->active); + return err; } + } + + return 0; +} - err = 0; - with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref) - err = ce->ops->pin(ce); +static void intel_context_active_release(struct intel_context *ce) +{ + /* Nodes preallocated in intel_context_active() */ + i915_active_acquire_barrier(&ce->active); + i915_active_release(&ce->active); +} + +int __intel_context_do_pin(struct intel_context *ce) +{ + int err; + + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + err = intel_context_alloc_state(ce); if (err) + return err; + } + + if (mutex_lock_interruptible(&ce->pin_mutex)) + return -EINTR; + + if (likely(!atomic_read(&ce->pin_count))) { + err = intel_context_active_acquire(ce); + if (unlikely(err)) goto err; - GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", - ce->engine->name, ce->timeline->fence_context, - ce->ring->head, ce->ring->tail); + err = ce->ops->pin(ce); + if (unlikely(err)) + goto err_active; - i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", + ce->ring->head, ce->ring->tail); smp_mb__before_atomic(); /* flush pin before it is visible */ } @@ -83,6 +125,8 @@ int __intel_context_do_pin(struct intel_context *ce) mutex_unlock(&ce->pin_mutex); return 0; +err_active: + intel_context_active_release(ce); err: mutex_unlock(&ce->pin_mutex); return err; @@ -90,36 +134,29 @@ err: void intel_context_unpin(struct intel_context *ce) { - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + if (!atomic_dec_and_test(&ce->pin_count)) return; - /* We may be called from inside intel_context_pin() to evict another */ - intel_context_get(ce); - mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); - - if (likely(atomic_dec_and_test(&ce->pin_count))) { - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); - - ce->ops->unpin(ce); - - i915_gem_context_put(ce->gem_context); - intel_context_active_release(ce); - } + CE_TRACE(ce, "unpin\n"); + ce->ops->unpin(ce); - mutex_unlock(&ce->pin_mutex); + /* + * Once released, we may asynchronously drop the active reference. + * As that may be the only reference keeping the context alive, + * take an extra now so that it is not freed before we finish + * dereferencing it. + */ + intel_context_get(ce); + intel_context_active_release(ce); intel_context_put(ce); } static int __context_pin_state(struct i915_vma *vma) { - u64 flags; + unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; int err; - flags = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; - flags |= PIN_HIGH | PIN_GLOBAL; - - err = i915_vma_pin(vma, 0, 0, flags); + err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH); if (err) return err; @@ -178,9 +215,9 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); + set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) __context_unpin_state(ce->state); @@ -195,6 +232,8 @@ static int __intel_context_active(struct i915_active *active) struct intel_context *ce = container_of(active, typeof(*ce), active); int err; + CE_TRACE(ce, "active\n"); + intel_context_get(ce); err = __ring_active(ce->ring); @@ -223,60 +262,21 @@ err_put: return err; } -int intel_context_active_acquire(struct intel_context *ce) -{ - int err; - - err = i915_active_acquire(&ce->active); - if (err) - return err; - - /* Preallocate tracking nodes */ - if (!i915_gem_context_is_kernel(ce->gem_context)) { - err = i915_active_acquire_preallocate_barrier(&ce->active, - ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } - } - - return 0; -} - -void intel_context_active_release(struct intel_context *ce) -{ - /* Nodes preallocated in intel_context_active() */ - i915_active_acquire_barrier(&ce->active); - i915_active_release(&ce->active); -} - void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct i915_address_space *vm; - GEM_BUG_ON(!engine->cops); + GEM_BUG_ON(!engine->gt->vm); kref_init(&ce->ref); - ce->gem_context = ctx; - rcu_read_lock(); - vm = rcu_dereference(ctx->vm); - if (vm) - ce->vm = i915_vm_get(vm); - else - ce->vm = i915_vm_get(&engine->gt->ggtt->vm); - rcu_read_unlock(); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring = __intel_context_ring_size(SZ_4K); + + ce->vm = i915_vm_get(engine->gt->vm); INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); @@ -341,30 +341,11 @@ int intel_context_prepare_remote_request(struct intel_context *ce, int err; /* Only suitable for use in remotely modifying this context */ - GEM_BUG_ON(rq->hw_context == ce); + GEM_BUG_ON(rq->context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - /* - * Ideally, we just want to insert our foreign fence as - * a barrier into the remove context, such that this operation - * occurs after all current operations in that context, and - * all future operations must occur after this. - * - * Currently, the timeline->last_request tracking is guarded - * by its mutex and so we must obtain that to atomically - * insert our barrier. However, since we already hold our - * timeline->mutex, we must be careful against potential - * inversion if we are the kernel_context as the remote context - * will itself poke at the kernel_context when it needs to - * unpin. Ergo, if already locked, we drop both locks and - * try again (through the magic of userspace repeating EAGAIN). - */ - if (!mutex_trylock(&tl->mutex)) - return -EAGAIN; - /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); - mutex_unlock(&tl->mutex); if (err) return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 68b3d317d959..30bd248827d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -7,7 +7,9 @@ #ifndef __INTEL_CONTEXT_H__ #define __INTEL_CONTEXT_H__ +#include <linux/bitops.h> #include <linux/lockdep.h> +#include <linux/types.h> #include "i915_active.h" #include "intel_context_types.h" @@ -15,14 +17,21 @@ #include "intel_ring_types.h" #include "intel_timeline_types.h" +#define CE_TRACE(ce, fmt, ...) do { \ + const struct intel_context *ce__ = (ce); \ + ENGINE_TRACE(ce__->engine, "context:%llx " fmt, \ + ce__->timeline->fence_context, \ + ##__VA_ARGS__); \ +} while (0) + void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); struct intel_context * -intel_context_create(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); +intel_context_create(struct intel_engine_cs *engine); + +int intel_context_alloc_state(struct intel_context *ce); void intel_context_free(struct intel_context *ce); @@ -69,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) int __intel_context_do_pin(struct intel_context *ce); +static inline bool intel_context_pin_if_active(struct intel_context *ce) +{ + return atomic_inc_not_zero(&ce->pin_count); +} + static inline int intel_context_pin(struct intel_context *ce) { - if (likely(atomic_inc_not_zero(&ce->pin_count))) + if (likely(intel_context_pin_if_active(ce))) return 0; return __intel_context_do_pin(ce); @@ -109,9 +123,6 @@ static inline void intel_context_exit(struct intel_context *ce) ce->ops->exit(ce); } -int intel_context_active_acquire(struct intel_context *ce); -void intel_context_active_release(struct intel_context *ce); - static inline struct intel_context *intel_context_get(struct intel_context *ce) { kref_get(&ce->ref); @@ -153,4 +164,64 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz) return u64_to_ptr(struct intel_ring, sz); } +static inline bool intel_context_is_barrier(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); +} + +static inline bool intel_context_use_semaphores(const struct intel_context *ce) +{ + return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline void intel_context_set_use_semaphores(struct intel_context *ce) +{ + set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline void intel_context_clear_use_semaphores(struct intel_context *ce) +{ + clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); +} + +static inline bool intel_context_is_banned(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool intel_context_set_banned(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool +intel_context_force_single_submission(const struct intel_context *ce) +{ + return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline void +intel_context_set_single_submission(struct intel_context *ce) +{ + __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline bool +intel_context_nopreempt(const struct intel_context *ce) +{ + return test_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_set_nopreempt(struct intel_context *ce) +{ + set_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_clear_nopreempt(struct intel_context *ce) +{ + clear_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 6959b05ae5f8..ca1420fb8b53 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -17,6 +17,8 @@ #include "intel_engine_types.h" #include "intel_sseu.h" +#define CONTEXT_REDZONE POISON_INUSE + struct i915_gem_context; struct i915_vma; struct intel_context; @@ -44,7 +46,7 @@ struct intel_context { #define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) struct i915_address_space *vm; - struct i915_gem_context *gem_context; + struct i915_gem_context __rcu *gem_context; struct list_head signal_link; struct list_head signals; @@ -54,7 +56,13 @@ struct intel_context { struct intel_timeline *timeline; unsigned long flags; -#define CONTEXT_ALLOC_BIT 0 +#define CONTEXT_BARRIER_BIT 0 +#define CONTEXT_ALLOC_BIT 1 +#define CONTEXT_VALID_BIT 2 +#define CONTEXT_USE_SEMAPHORES 3 +#define CONTEXT_BANNED 4 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 5 +#define CONTEXT_NOPREEMPT 6 u32 *lrc_reg_state; u64 lrc_desc; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 01765a7ec18f..5df003061e44 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -29,6 +29,13 @@ struct intel_gt; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) +#define ENGINE_TRACE(e, fmt, ...) do { \ + const struct intel_engine_cs *e__ __maybe_unused = (e); \ + GEM_TRACE("%s %s: " fmt, \ + dev_name(e__->i915->drm.dev), e__->name, \ + ##__VA_ARGS__); \ +} while (0) + /* * The register defines to be used with the following macros need to accept a * base param, e.g: @@ -177,15 +184,15 @@ void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); int intel_engines_init_mmio(struct intel_gt *gt); -int intel_engines_setup(struct intel_gt *gt); int intel_engines_init(struct intel_gt *gt); -void intel_engines_cleanup(struct intel_gt *gt); + +void intel_engines_release(struct intel_gt *gt); +void intel_engines_free(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_ring_submission_setup(struct intel_engine_cs *engine); -int intel_ring_submission_init(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); @@ -195,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); -void intel_engine_get_instdone(struct intel_engine_cs *engine, +void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone); void intel_engine_init_execlists(struct intel_engine_cs *engine); @@ -206,13 +213,11 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void -intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) +intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) { irq_work_queue(&engine->breadcrumbs.irq_work); } -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); - void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); @@ -270,8 +275,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { - if (engine->reset.reset) - engine->reset.reset(engine, stalled); + if (engine->reset.rewind) + engine->reset.rewind(engine, stalled); engine->serial++; /* contexts lost */ } @@ -296,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); -u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class); +u32 intel_engine_context_size(struct intel_gt *gt, u8 class); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 813bd3a610d2..f451ef376548 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -141,7 +141,7 @@ static const struct engine_info intel_engines[] = { /** * intel_engine_context_size() - return the size of the context for an engine - * @dev_priv: i915 device private + * @gt: the gt * @class: engine class * * Each engine class may require a different amount of space for a context @@ -153,17 +153,18 @@ static const struct engine_info intel_engines[] = { * in LRC mode, but does not include the "shared data page" used with * GuC submission. The caller should account for this if using the GuC. */ -u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +u32 intel_engine_context_size(struct intel_gt *gt, u8 class) { + struct intel_uncore *uncore = gt->uncore; u32 cxt_size; BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); switch (class) { case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(gt->i915)) { default: - MISSING_CASE(INTEL_GEN(dev_priv)); + MISSING_CASE(INTEL_GEN(gt->i915)); return DEFAULT_LR_CONTEXT_RENDER_SIZE; case 12: case 11: @@ -175,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case 8: return GEN8_LR_CONTEXT_RENDER_SIZE; case 7: - if (IS_HASWELL(dev_priv)) + if (IS_HASWELL(gt->i915)) return HSW_CXT_TOTAL_SIZE; - cxt_size = I915_READ(GEN7_CXT_SIZE); + cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE); return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, PAGE_SIZE); case 6: - cxt_size = I915_READ(CXT_SIZE); + cxt_size = intel_uncore_read(uncore, CXT_SIZE); return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, PAGE_SIZE); case 5: @@ -197,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) * minimum allocation anyway so it should all come * out in the wash. */ - cxt_size = I915_READ(CXT_SIZE) + 1; + cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n", - INTEL_GEN(dev_priv), + INTEL_GEN(gt->i915), cxt_size * 64, cxt_size - 1); return round_up(cxt_size * 64, PAGE_SIZE); @@ -216,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: - if (INTEL_GEN(dev_priv) < 8) + if (INTEL_GEN(gt->i915) < 8) return 0; return GEN8_LR_CONTEXT_OTHER_SIZE; } @@ -318,14 +319,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->props.timeslice_duration_ms = CONFIG_DRM_I915_TIMESLICE_DURATION; - /* - * To be overridden by the backend on setup. However to facilitate - * cleanup on error during setup, we always provide the destroy vfunc. - */ - engine->destroy = (typeof(engine->destroy))kfree; - - engine->context_size = intel_engine_context_size(gt->i915, - engine->class); + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) @@ -334,6 +328,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + ewma__engine_latency_init(&engine->latency); seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -344,7 +339,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) gt->engine_class[info->class][info->instance] = engine; gt->engine[id] = engine; - intel_engine_add_user(engine); gt->i915->engine[id] = engine; return 0; @@ -390,21 +384,39 @@ static void intel_setup_engine_capabilities(struct intel_gt *gt) } /** - * intel_engines_cleanup() - free the resources allocated for Command Streamers + * intel_engines_release() - free the resources allocated for Command Streamers * @gt: pointer to struct intel_gt */ -void intel_engines_cleanup(struct intel_gt *gt) +void intel_engines_release(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + /* Decouple the backend; but keep the layout for late GPU resets */ for_each_engine(engine, gt, id) { - engine->destroy(engine); - gt->engine[id] = NULL; + if (!engine->release) + continue; + + engine->release(engine); + engine->release = NULL; + + memset(&engine->reset, 0, sizeof(engine->reset)); + gt->i915->engine[id] = NULL; } } +void intel_engines_free(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) { + kfree(engine); + gt->engine[id] = NULL; + } +} + /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * @gt: pointer to struct intel_gt @@ -455,38 +467,7 @@ int intel_engines_init_mmio(struct intel_gt *gt) return 0; cleanup: - intel_engines_cleanup(gt); - return err; -} - -/** - * intel_engines_init() - init the Engine Command Streamers - * @gt: pointer to struct intel_gt - * - * Return: non-zero if the initialization failed. - */ -int intel_engines_init(struct intel_gt *gt) -{ - int (*init)(struct intel_engine_cs *engine); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - if (HAS_EXECLISTS(gt->i915)) - init = intel_execlists_submission_init; - else - init = intel_ring_submission_init; - - for_each_engine(engine, gt, id) { - err = init(engine); - if (err) - goto cleanup; - } - - return 0; - -cleanup: - intel_engines_cleanup(gt); + intel_engines_free(gt); return err; } @@ -601,7 +582,7 @@ err: return ret; } -static int intel_engine_setup_common(struct intel_engine_cs *engine) +static int engine_setup_common(struct intel_engine_cs *engine) { int err; @@ -631,49 +612,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) return 0; } -/** - * intel_engines_setup- setup engine state not requiring hw access - * @gt: pointer to struct intel_gt - * - * Initializes engine structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -int intel_engines_setup(struct intel_gt *gt) -{ - int (*setup)(struct intel_engine_cs *engine); - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - if (HAS_EXECLISTS(gt->i915)) - setup = intel_execlists_submission_setup; - else - setup = intel_ring_submission_setup; - - for_each_engine(engine, gt, id) { - err = intel_engine_setup_common(engine); - if (err) - goto cleanup; - - err = setup(engine); - if (err) - goto cleanup; - - /* We expect the backend to take control over its state */ - GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree); - - GEM_BUG_ON(!engine->cops); - } - - return 0; - -cleanup: - intel_engines_cleanup(gt); - return err; -} - struct measure_breadcrumb { struct i915_request rq; struct intel_timeline timeline; @@ -757,13 +695,13 @@ create_kernel_context(struct intel_engine_cs *engine) struct intel_context *ce; int err; - ce = intel_context_create(engine->i915->kernel_context, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return ce; - ce->ring = __intel_context_ring_size(SZ_4K); + __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); - err = intel_context_pin(ce); + err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { intel_context_put(ce); return ERR_PTR(err); @@ -791,13 +729,19 @@ create_kernel_context(struct intel_engine_cs *engine) * * Returns zero on success or an error code on failure. */ -int intel_engine_init_common(struct intel_engine_cs *engine) +static int engine_init_common(struct intel_engine_cs *engine) { struct intel_context *ce; int ret; engine->set_default_submission(engine); + ret = measure_breadcrumb_dw(engine); + if (ret < 0) + return ret; + + engine->emit_fini_breadcrumb_dw = ret; + /* * We may need to do things with the shrinker which * require us to immediately switch back to the default @@ -812,18 +756,38 @@ int intel_engine_init_common(struct intel_engine_cs *engine) engine->kernel_context = ce; - ret = measure_breadcrumb_dw(engine); - if (ret < 0) - goto err_unpin; + return 0; +} - engine->emit_fini_breadcrumb_dw = ret; +int intel_engines_init(struct intel_gt *gt) +{ + int (*setup)(struct intel_engine_cs *engine); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return 0; + if (HAS_EXECLISTS(gt->i915)) + setup = intel_execlists_submission_setup; + else + setup = intel_ring_submission_setup; -err_unpin: - intel_context_unpin(ce); - intel_context_put(ce); - return ret; + for_each_engine(engine, gt, id) { + err = engine_setup_common(engine); + if (err) + return err; + + err = setup(engine); + if (err) + return err; + + err = engine_init_common(engine); + if (err) + return err; + + intel_engine_add_user(engine); + } + + return 0; } /** @@ -836,6 +800,7 @@ err_unpin: void intel_engine_cleanup_common(struct intel_engine_cs *engine) { GEM_BUG_ON(!list_empty(&engine->active.requests)); + tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ cleanup_status_page(engine); @@ -911,7 +876,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) < 3) return -ENODEV; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); @@ -920,7 +885,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) mode, MODE_IDLE, MODE_IDLE, 1000, stop_timeout(engine), NULL)) { - GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n"); err = -ETIMEDOUT; } @@ -932,7 +897,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) { - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } @@ -949,8 +914,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } static u32 -read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, - i915_reg_t reg) +read_subslice_reg(const struct intel_engine_cs *engine, + int slice, int subslice, i915_reg_t reg) { struct drm_i915_private *i915 = engine->i915; struct intel_uncore *uncore = engine->uncore; @@ -994,7 +959,7 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, } /* NB: please notice the memset */ -void intel_engine_get_instdone(struct intel_engine_cs *engine, +void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; @@ -1478,6 +1443,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); + drm_printf(m, "\tBarriers?: %s\n", + yesno(!llist_empty(&engine->barrier_tasks))); + drm_printf(m, "\tLatency: %luus\n", + ewma__engine_latency_read(&engine->latency)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); @@ -1517,9 +1486,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); - if (rq->hw_context->lrc_reg_state) { + if (rq->context->lrc_reg_state) { drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } spin_unlock_irqrestore(&engine->active.lock, flags); @@ -1580,7 +1549,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) for (port = execlists->pending; (rq = *port); port++) { /* Exclude any contexts already counted in active */ - if (!intel_context_inflight_count(rq->hw_context)) + if (!intel_context_inflight_count(rq->context)) engine->stats.active++; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 06aa14c7aa8c..6c6fd185457c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -63,15 +63,15 @@ static void heartbeat(struct work_struct *wrk) struct intel_context *ce = engine->kernel_context; struct i915_request *rq; - if (!intel_engine_pm_get_if_awake(engine)) - return; - rq = engine->heartbeat.systole; if (rq && i915_request_completed(rq)) { i915_request_put(rq); engine->heartbeat.systole = NULL; } + if (!intel_engine_pm_get_if_awake(engine)) + return; + if (intel_gt_is_wedged(engine->gt)) goto out; @@ -199,7 +199,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine) goto out_unlock; } - rq->flags |= I915_REQUEST_SENTINEL; + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); idle_pulse(engine, rq); __i915_request_commit(rq); @@ -215,18 +215,26 @@ out_rpm: int intel_engine_flush_barriers(struct intel_engine_cs *engine) { struct i915_request *rq; + int err = 0; if (llist_empty(&engine->barrier_tasks)) return 0; + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_rpm; + } idle_pulse(engine, rq); i915_request_add(rq); - return 0; +out_rpm: + intel_engine_pm_put(engine); + return err; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index c1dd0cd3efc7..ea90ab3e396e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -6,6 +6,7 @@ #include "i915_drv.h" +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" @@ -19,9 +20,10 @@ static int __engine_unpark(struct intel_wakeref *wf) { struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); + struct intel_context *ce; void *map; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); intel_gt_pm_get(engine->gt); @@ -33,6 +35,27 @@ static int __engine_unpark(struct intel_wakeref *wf) if (!IS_ERR_OR_NULL(map)) engine->pinned_default_state = map; + /* Discard stale context state from across idling */ + ce = engine->kernel_context; + if (ce) { + GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); + + /* First poison the image to verify we never fully trust it */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { + struct drm_i915_gem_object *obj = ce->state->obj; + int type = i915_coherent_map_type(engine->i915); + + map = i915_gem_object_pin_map(obj, type); + if (!IS_ERR(map)) { + memset(map, CONTEXT_REDZONE, obj->base.size); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + } + } + + ce->ops->reset(ce); + } + if (engine->unpark) engine->unpark(engine); @@ -73,6 +96,15 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_request *rq = to_request(fence); + + ewma__engine_latency_add(&rq->engine->latency, + ktime_us_delta(rq->fence.timestamp, + rq->duration.emitted)); +} + static void __queue_and_release_pm(struct i915_request *rq, struct intel_timeline *tl, @@ -80,7 +112,7 @@ __queue_and_release_pm(struct i915_request *rq, { struct intel_gt_timelines *timelines = &engine->gt->timelines; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * We have to serialise all potential retirement paths with our @@ -113,14 +145,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) unsigned long flags; bool result = true; - /* Already inside the kernel context, safe to power down. */ - if (engine->wakeref_serial == engine->serial) - return true; - /* GPU is pointing to the void, as good as in the kernel context. */ if (intel_gt_is_wedged(engine->gt)) return true; + GEM_BUG_ON(!intel_context_is_barrier(ce)); + + /* Already inside the kernel context, safe to power down. */ + if (engine->wakeref_serial == engine->serial) + return true; + /* * Note, we do this without taking the timeline->mutex. We cannot * as we may be called while retiring the kernel context and so @@ -163,7 +197,18 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Install ourselves as a preemption barrier */ rq->sched.attr.priority = I915_PRIORITY_BARRIER; - __i915_request_commit(rq); + if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ + /* + * Use an interrupt for precise measurement of duration, + * otherwise we rely on someone else retiring all the requests + * which may delay the signaling (i.e. we will likely wait + * until the background request retirement running every + * second or two). + */ + BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); + dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); + rq->duration.emitted = ktime_get(); + } /* Expose ourselves to the world */ __queue_and_release_pm(rq, ce->timeline, engine); @@ -183,7 +228,7 @@ static void call_idle_barriers(struct intel_engine_cs *engine) container_of((struct list_head *)node, typeof(*cb), node); - cb->func(NULL, cb); + cb->func(ERR_PTR(-EAGAIN), cb); } } @@ -204,7 +249,7 @@ static int __engine_park(struct intel_wakeref *wf) if (!switch_to_kernel_context(engine)) return -EBUSY; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); call_idle_barriers(engine); /* cleanup after wedging */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 24e20344dc22..e52c2b0cb245 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,6 +7,7 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" @@ -41,6 +42,26 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) intel_wakeref_unlock_wait(&engine->wakeref); } +static inline struct i915_request * +intel_engine_create_kernel_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* + * The engine->kernel_context is special as it is used inside + * the engine-pm barrier (see __engine_park()), circumventing + * the usual mutexes and relying on the engine-pm barrier + * instead. So whenever we use the engine->kernel_context + * outside of the barrier, we must manually handle the + * engine wakeref to serialise with the use inside. + */ + intel_engine_pm_get(engine); + rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); + + return rq; +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 2b446474e010..350da59e605b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -7,6 +7,7 @@ #ifndef __INTEL_ENGINE_TYPES__ #define __INTEL_ENGINE_TYPES__ +#include <linux/average.h> #include <linux/hashtable.h> #include <linux/irq_work.h> #include <linux/kref.h> @@ -119,6 +120,9 @@ enum intel_engine_id { #define INVALID_ENGINE ((enum intel_engine_id)-1) }; +/* A simple estimator for the round-trip latency of an engine */ +DECLARE_EWMA(_engine_latency, 6, 4) + struct st_preempt_hang { struct completion completion; unsigned int count; @@ -316,6 +320,13 @@ struct intel_engine_cs { struct intel_timeline *timeline; } legacy; + /* + * We track the average duration of the idle pulse on parking the + * engine to keep an estimate of the how the fast the engine is + * under ideal conditions. + */ + struct ewma__engine_latency latency; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -389,7 +400,10 @@ struct intel_engine_cs { struct { void (*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, bool stalled); + + void (*rewind)(struct intel_engine_cs *engine, bool stalled); + void (*cancel)(struct intel_engine_cs *engine); + void (*finish)(struct intel_engine_cs *engine); } reset; @@ -439,15 +453,7 @@ struct intel_engine_cs { void (*schedule)(struct i915_request *request, const struct i915_sched_attr *attr); - /* - * Cancel all requests on the hardware, or queued for execution. - * This should only cancel the ready requests that have been - * submitted to the engine (via the engine->submit_request callback). - * This is called when marking the device as wedged. - */ - void (*cancel_requests)(struct intel_engine_cs *engine); - - void (*destroy)(struct intel_engine_cs *engine); + void (*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 7f7150a733f4..9e7f12bef828 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "intel_engine.h" #include "intel_engine_user.h" +#include "intel_gt.h" struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) @@ -200,6 +201,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915) uabi_node); char old[sizeof(engine->name)]; + if (intel_gt_has_init_error(engine->gt)) + continue; /* ignore incomplete engines */ + GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c new file mode 100644 index 000000000000..531d501be01f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -0,0 +1,1486 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/stop_machine.h> + +#include <asm/set_memory.h> +#include <asm/smp.h> + +#include "intel_gt.h" +#include "i915_drv.h" +#include "i915_scatterlist.h" +#include "i915_vgpu.h" + +#include "intel_gtt.h" + +static int +i915_get_ggtt_vma_pages(struct i915_vma *vma); + +static void i915_ggtt_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) +{ + if (i915_node_color_differs(node, color)) + *start += I915_GTT_PAGE_SIZE; + + /* + * Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ + node = list_next_entry(node, node_list); + if (node->color != color) + *end -= I915_GTT_PAGE_SIZE; +} + +static int ggtt_init_hw(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); + + ggtt->vm.is_ggtt = true; + + /* Only VLV supports read-only GGTT mappings */ + ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); + + if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) + ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; + + if (ggtt->mappable_end) { + if (!io_mapping_init_wc(&ggtt->iomap, + ggtt->gmadr.start, + ggtt->mappable_end)) { + ggtt->vm.cleanup(&ggtt->vm); + return -EIO; + } + + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, + ggtt->mappable_end); + } + + i915_ggtt_init_fences(ggtt); + + return 0; +} + +/** + * i915_ggtt_init_hw - Initialize GGTT hardware + * @i915: i915 device + */ +int i915_ggtt_init_hw(struct drm_i915_private *i915) +{ + int ret; + + stash_init(&i915->mm.wc_stash); + + /* + * Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. + */ + ret = ggtt_init_hw(&i915->ggtt); + if (ret) + return ret; + + return 0; +} + +/* + * Certain Gen5 chipsets require require idling the GPU before + * unmapping anything from the GTT when VT-d is enabled. + */ +static bool needs_idle_maps(struct drm_i915_private *i915) +{ + /* + * Query intel_iommu to see if we need the workaround. Presumably that + * was loaded first. + */ + return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); +} + +static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + + /* + * Don't bother messing with faults pre GEN6 as we have little + * documentation supporting that it's a good idea. + */ + if (INTEL_GEN(i915) < 6) + return; + + intel_gt_check_and_clear_faults(ggtt->vm.gt); + + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + + ggtt->invalidate(ggtt); +} + +void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) +{ + ggtt_suspend_mappings(&i915->ggtt); +} + +void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + + spin_lock_irq(&uncore->lock); + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); + spin_unlock_irq(&uncore->lock); +} + +static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + + /* + * Note that as an uncached mmio write, this will flush the + * WCB of the writes into the GGTT before it triggers the invalidate. + */ + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); +} + +static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; + + gen8_ggtt_invalidate(ggtt); + + if (INTEL_GEN(i915) >= 12) + intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + else + intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); +} + +static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + intel_gtt_chipset_flush(); +} + +static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) +{ + writeq(pte, addr); +} + +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); + + ggtt->invalidate(ggtt); +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct sgt_iter sgt_iter; + gen8_pte_t __iomem *gtt_entries; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); + dma_addr_t addr; + + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, sgt_iter, vma->pages) + gen8_set_pte(gtt_entries++, pte_encode | addr); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + iowrite32(vm->pte_encode(addr, level, flags), pte); + + ggtt->invalidate(ggtt); +} + +/* + * Binds an object into the global gtt with the specified cache level. + * The object will be accessible to the GPU via commands whose operands + * reference offsets within the global GTT as well as accessible by the GPU + * through the GMADR mapped BAR (i915->mm.gtt->gtt). + */ +static void gen6_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + struct sgt_iter iter; + dma_addr_t addr; + + for_each_sgt_daddr(addr, iter, vma->pages) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void nop_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0].encode; + gen8_pte_t __iomem *gtt_base = + (gen8_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); +} + +static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) +{ + /* + * Make sure the internal GAM fifo has been cleared of all GTT + * writes before exiting stop_machine(). This guarantees that + * any aperture accesses waiting to start in another process + * cannot back up behind the GTT writes causing a hang. + * The register can be any arbitrary GAM register. + */ + intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); +} + +struct insert_page { + struct i915_address_space *vm; + dma_addr_t addr; + u64 offset; + enum i915_cache_level level; +}; + +static int bxt_vtd_ggtt_insert_page__cb(void *_arg) +{ + struct insert_page *arg = _arg; + + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct insert_page arg = { vm, addr, offset, level }; + + stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); +} + +struct insert_entries { + struct i915_address_space *vm; + struct i915_vma *vma; + enum i915_cache_level level; + u32 flags; +}; + +static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) +{ + struct insert_entries *arg = _arg; + + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct insert_entries arg = { vm, vma, level, flags }; + + stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); +} + +struct clear_range { + struct i915_address_space *vm; + u64 start; + u64 length; +}; + +static int bxt_vtd_ggtt_clear_range__cb(void *_arg) +{ + struct clear_range *arg = _arg; + + gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, + u64 start, + u64 length) +{ + struct clear_range arg = { vm, start, length }; + + stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); +} + +static void gen6_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + gen6_pte_t scratch_pte, __iomem *gtt_base = + (gen6_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = vm->scratch[0].encode; + for (i = 0; i < num_entries; i++) + iowrite32(scratch_pte, >t_base[i]); +} + +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); +} + +static void i915_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, + flags); +} + +static void i915_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); +} + +static int ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + u32 pte_flags; + + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ + pte_flags = 0; + if (i915_gem_object_is_readonly(obj)) + pte_flags |= PTE_READ_ONLY; + + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + + /* + * Without aliasing PPGTT there's no difference between + * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally + * upgrade to both bound if we bind either to avoid double-binding. + */ + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); + + return 0; +} + +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); +} + +static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) +{ + u64 size; + int ret; + + if (!USES_GUC(ggtt->vm.i915)) + return 0; + + GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); + size = ggtt->vm.total - GUC_GGTT_TOP; + + ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, + PIN_NOEVICT); + if (ret) + DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); + + return ret; +} + +static void ggtt_release_guc_top(struct i915_ggtt *ggtt) +{ + if (drm_mm_node_allocated(&ggtt->uc_fw)) + drm_mm_remove_node(&ggtt->uc_fw); +} + +static void cleanup_init_ggtt(struct i915_ggtt *ggtt) +{ + ggtt_release_guc_top(ggtt); + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + mutex_destroy(&ggtt->error_mutex); +} + +static int init_ggtt(struct i915_ggtt *ggtt) +{ + /* + * Let GEM Manage all of the aperture. + * + * However, leave one page at the end still bound to the scratch page. + * There are a number of places where the hardware apparently prefetches + * past the end of the object, and we've seen multiple hangs with the + * GPU head pointer stuck in a batchbuffer bound at the last page of the + * aperture. One page should be enough to keep any prefetching inside + * of the aperture. + */ + unsigned long hole_start, hole_end; + struct drm_mm_node *entry; + int ret; + + /* + * GuC requires all resources that we're sharing with it to be placed in + * non-WOPCM memory. If GuC is not present or not in use we still need a + * small bias as ring wraparound at offset 0 sometimes hangs. No idea + * why. + */ + ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, + intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); + + ret = intel_vgt_balloon(ggtt); + if (ret) + return ret; + + mutex_init(&ggtt->error_mutex); + if (ggtt->mappable_end) { + /* Reserve a mappable slot for our lockless error capture */ + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (ret) + return ret; + } + + /* + * The upper portion of the GuC address space has a sizeable hole + * (several MB) that is inaccessible by GuC. Reserve this range within + * GGTT as it can comfortably hold GuC/HuC firmware images. + */ + ret = ggtt_reserve_guc_top(ggtt); + if (ret) + goto err; + + /* Clear any non-preallocated blocks */ + drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { + DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); + ggtt->vm.clear_range(&ggtt->vm, hole_start, + hole_end - hole_start); + } + + /* And finally clear the reserved guard page */ + ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); + + return 0; + +err: + cleanup_init_ggtt(ggtt); + return ret; +} + +static int aliasing_gtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + u32 pte_flags; + int ret; + + /* Currently applicable only to VLV */ + pte_flags = 0; + if (i915_gem_object_is_readonly(vma->obj)) + pte_flags |= PTE_READ_ONLY; + + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; + + if (flags & I915_VMA_ALLOC) { + ret = alias->vm.allocate_va_range(&alias->vm, + vma->node.start, + vma->size); + if (ret) + return ret; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + } + + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, + __i915_vma_flags(vma))); + alias->vm.insert_entries(&alias->vm, vma, + cache_level, pte_flags); + } + + if (flags & I915_VMA_GLOBAL_BIND) + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + return 0; +} + +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) +{ + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + struct i915_address_space *vm = vma->vm; + + vm->clear_range(vm, vma->node.start, vma->size); + } + + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { + struct i915_address_space *vm = + &i915_vm_to_ggtt(vma->vm)->alias->vm; + + vm->clear_range(vm, vma->node.start, vma->size); + } +} + +static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) +{ + struct i915_ppgtt *ppgtt; + int err; + + ppgtt = i915_ppgtt_create(ggtt->vm.gt); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { + err = -ENODEV; + goto err_ppgtt; + } + + /* + * Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ + err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); + if (err) + goto err_ppgtt; + + ggtt->alias = ppgtt; + ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; + + GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); + ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; + + GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); + ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + + return 0; + +err_ppgtt: + i915_vm_put(&ppgtt->vm); + return err; +} + +static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) +{ + struct i915_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&ggtt->alias); + if (!ppgtt) + return; + + i915_vm_put(&ppgtt->vm); + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; +} + +int i915_init_ggtt(struct drm_i915_private *i915) +{ + int ret; + + ret = init_ggtt(&i915->ggtt); + if (ret) + return ret; + + if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { + ret = init_aliasing_ppgtt(&i915->ggtt); + if (ret) + cleanup_init_ggtt(&i915->ggtt); + } + + return 0; +} + +static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) +{ + struct i915_vma *vma, *vn; + + atomic_set(&ggtt->vm.open, 0); + + rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ + flush_workqueue(ggtt->vm.i915->wq); + + mutex_lock(&ggtt->vm.mutex); + + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) + WARN_ON(__i915_vma_unbind(vma)); + + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + mutex_destroy(&ggtt->error_mutex); + + ggtt_release_guc_top(ggtt); + intel_vgt_deballoon(ggtt); + + ggtt->vm.cleanup(&ggtt->vm); + + mutex_unlock(&ggtt->vm.mutex); + i915_address_space_fini(&ggtt->vm); + + arch_phys_wc_del(ggtt->mtrr); + + if (ggtt->iomap.size) + io_mapping_fini(&ggtt->iomap); +} + +/** + * i915_ggtt_driver_release - Clean up GGTT hardware initialization + * @i915: i915 device + */ +void i915_ggtt_driver_release(struct drm_i915_private *i915) +{ + struct pagevec *pvec; + + fini_aliasing_ppgtt(&i915->ggtt); + + ggtt_cleanup_hw(&i915->ggtt); + + pvec = &i915->mm.wc_stash.pvec; + if (pvec->nr) { + set_pages_array_wb(pvec->pages, pvec->nr); + __pagevec_release(pvec); + } +} + +static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; + if (bdw_gmch_ctl) + bdw_gmch_ctl = 1 << bdw_gmch_ctl; + +#ifdef CONFIG_X86_32 + /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ + if (bdw_gmch_ctl > 4) + bdw_gmch_ctl = 4; +#endif + + return bdw_gmch_ctl << 20; +} + +static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) +{ + gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; + gmch_ctrl &= SNB_GMCH_GGMS_MASK; + + if (gmch_ctrl) + return 1 << (20 + gmch_ctrl); + + return 0; +} + +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + phys_addr_t phys_addr; + int ret; + + /* For Modern GENs the PTEs and register space are split in the BAR */ + phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; + + /* + * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + ggtt->gsm = ioremap(phys_addr, size); + else + ggtt->gsm = ioremap_wc(phys_addr, size); + if (!ggtt->gsm) { + DRM_ERROR("Failed to map the ggtt page table\n"); + return -ENOMEM; + } + + ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + /* iounmap will also get called at remove, but meh */ + iounmap(ggtt->gsm); + return ret; + } + + ggtt->vm.scratch[0].encode = + ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), + I915_CACHE_NONE, 0); + + return 0; +} + +int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + cleanup_scratch_page(vm); +} + +static struct resource pci_resource(struct pci_dev *pdev, int bar) +{ + return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar)); +} + +static int gen8_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + unsigned int size; + u16 snb_gmch_ctl; + int err; + + /* TODO: We're not aware of mappable constraints on gen8 yet */ + if (!IS_DGFX(i915)) { + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (IS_CHERRYVIEW(i915)) + size = chv_get_total_gtt_size(snb_gmch_ctl); + else + size = gen8_get_total_gtt_size(snb_gmch_ctl); + + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.cleanup = gen6_gmch_remove; + ggtt->vm.insert_page = gen8_ggtt_insert_page; + ggtt->vm.clear_range = nop_clear_range; + if (intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen8_ggtt_clear_range; + + ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + + /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ + if (intel_ggtt_update_needs_vtd_wa(i915) || + IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { + ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + if (ggtt->vm.clear_range != nop_clear_range) + ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; + } + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + ggtt->vm.pte_encode = gen8_pte_encode; + + setup_private_pat(ggtt->vm.gt->uncore); + + return ggtt_probe_common(ggtt, size); +} + +static u64 snb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 ivb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + pte |= GEN7_PTE_CACHE_L3_LLC; + break; + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 byt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (!(flags & PTE_READ_ONLY)) + pte |= BYT_PTE_WRITEABLE; + + if (level != I915_CACHE_NONE) + pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; + + return pte; +} + +static u64 hsw_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (level != I915_CACHE_NONE) + pte |= HSW_WB_LLC_AGE3; + + return pte; +} + +static u64 iris_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE3; + break; + default: + pte |= HSW_WB_ELLC_LLC_AGE3; + break; + } + + return pte; +} + +static int gen6_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = i915->drm.pdev; + unsigned int size; + u16 snb_gmch_ctl; + int err; + + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + + /* + * 64/512MB is the current min/max we actually know of, but this is + * just a coarse sanity check. + */ + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { + DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); + return -ENXIO; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.insert_page = gen6_ggtt_insert_page; + ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.cleanup = gen6_gmch_remove; + + ggtt->invalidate = gen6_ggtt_invalidate; + + if (HAS_EDRAM(i915)) + ggtt->vm.pte_encode = iris_pte_encode; + else if (IS_HASWELL(i915)) + ggtt->vm.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(i915)) + ggtt->vm.pte_encode = byt_pte_encode; + else if (INTEL_GEN(i915) >= 7) + ggtt->vm.pte_encode = ivb_pte_encode; + else + ggtt->vm.pte_encode = snb_pte_encode; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + return ggtt_probe_common(ggtt, size); +} + +static void i915_gmch_remove(struct i915_address_space *vm) +{ + intel_gmch_remove(); +} + +static int i915_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + phys_addr_t gmadr_base; + int ret; + + ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; + } + + intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); + + ggtt->gmadr = + (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + + ggtt->do_idle_maps = needs_idle_maps(i915); + ggtt->vm.insert_page = i915_ggtt_insert_page; + ggtt->vm.insert_entries = i915_ggtt_insert_entries; + ggtt->vm.clear_range = i915_ggtt_clear_range; + ggtt->vm.cleanup = i915_gmch_remove; + + ggtt->invalidate = gmch_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.set_pages = ggtt_set_pages; + ggtt->vm.vma_ops.clear_pages = clear_pages; + + if (unlikely(ggtt->do_idle_maps)) + dev_notice(i915->drm.dev, + "Applying Ironlake quirks for intel_iommu\n"); + + return 0; +} + +static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + int ret; + + ggtt->vm.gt = gt; + ggtt->vm.i915 = i915; + ggtt->vm.dma = &i915->drm.pdev->dev; + + if (INTEL_GEN(i915) <= 5) + ret = i915_gmch_probe(ggtt); + else if (INTEL_GEN(i915) < 8) + ret = gen6_gmch_probe(ggtt); + else + ret = gen8_gmch_probe(ggtt); + if (ret) + return ret; + + if ((ggtt->vm.total - 1) >> 32) { + DRM_ERROR("We never expected a Global GTT with more than 32bits" + " of address space! Found %lldM!\n", + ggtt->vm.total >> 20); + ggtt->vm.total = 1ULL << 32; + ggtt->mappable_end = + min_t(u64, ggtt->mappable_end, ggtt->vm.total); + } + + if (ggtt->mappable_end > ggtt->vm.total) { + DRM_ERROR("mappable aperture extends past end of GGTT," + " aperture=%pa, total=%llx\n", + &ggtt->mappable_end, ggtt->vm.total); + ggtt->mappable_end = ggtt->vm.total; + } + + /* GMADR is the PCI mmio aperture into the global GTT. */ + DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); + DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); + DRM_DEBUG_DRIVER("DSM size = %lluM\n", + (u64)resource_size(&intel_graphics_stolen_res) >> 20); + + return 0; +} + +/** + * i915_ggtt_probe_hw - Probe GGTT hardware location + * @i915: i915 device + */ +int i915_ggtt_probe_hw(struct drm_i915_private *i915) +{ + int ret; + + ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); + if (ret) + return ret; + + if (intel_vtd_active()) + dev_info(i915->drm.dev, "VT-d active for gfx access\n"); + + return 0; +} + +int i915_ggtt_enable_hw(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) + return -EIO; + + return 0; +} + +void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) +{ + GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); + + ggtt->invalidate = guc_ggtt_invalidate; + + ggtt->invalidate(ggtt); +} + +void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) +{ + /* XXX Temporary pardon for error unload */ + if (ggtt->invalidate == gen8_ggtt_invalidate) + return; + + /* We should only be called after i915_ggtt_enable_guc() */ + GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->invalidate(ggtt); +} + +static void ggtt_restore_mappings(struct i915_ggtt *ggtt) +{ + struct i915_vma *vma; + bool flush = false; + int open; + + intel_gt_check_and_clear_faults(ggtt->vm.gt); + + mutex_lock(&ggtt->vm.mutex); + + /* First fill our portion of the GTT with scratch pages */ + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); + + /* clflush objects bound into the GGTT and rebind them. */ + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + continue; + + clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); + WARN_ON(i915_vma_bind(vma, + obj ? obj->cache_level : 0, + PIN_GLOBAL, NULL)); + if (obj) { /* only used during resume => exclusive access */ + flush |= fetch_and_zero(&obj->write_domain); + obj->read_domains |= I915_GEM_DOMAIN_GTT; + } + } + + atomic_set(&ggtt->vm.open, open); + ggtt->invalidate(ggtt); + + mutex_unlock(&ggtt->vm.mutex); + + if (flush) + wbinvd_on_all_cpus(); +} + +void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + ggtt_restore_mappings(ggtt); + + if (INTEL_GEN(i915) >= 8) + setup_private_pat(ggtt->vm.gt->uncore); +} + +static struct scatterlist * +rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int column, row; + unsigned int src_idx; + + for (column = 0; column < width; column++) { + src_idx = stride * (height - 1) + column + offset; + for (row = 0; row < height; row++) { + st->nents++; + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); + sg_dma_address(sg) = + i915_gem_object_get_dma_address(obj, src_idx); + sg_dma_len(sg) = I915_GTT_PAGE_SIZE; + sg = sg_next(sg); + src_idx -= stride; + } + } + + return sg; +} + +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_rotation_info_size(rot_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { + sg = rotate_pages(obj, rot_info->plane[i].offset, + rot_info->plane[i].width, rot_info->plane[i].height, + rot_info->plane[i].stride, st, sg); + } + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static struct scatterlist * +remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int row; + + for (row = 0; row < height; row++) { + unsigned int left = width * I915_GTT_PAGE_SIZE; + + while (left) { + dma_addr_t addr; + unsigned int length; + + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + + addr = i915_gem_object_get_dma_address_len(obj, offset, &length); + + length = min(left, length); + + st->nents++; + + sg_set_page(sg, NULL, length, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = length; + sg = sg_next(sg); + + offset += length / I915_GTT_PAGE_SIZE; + left -= length; + } + + offset += stride - width; + } + + return sg; +} + +static noinline struct sg_table * +intel_remap_pages(struct intel_remapped_info *rem_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_remapped_info_size(rem_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { + sg = remap_pages(obj, rem_info->plane[i].offset, + rem_info->plane[i].width, rem_info->plane[i].height, + rem_info->plane[i].stride, st, sg); + } + + i915_sg_trim(st); + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static noinline struct sg_table * +intel_partial_pages(const struct i915_ggtt_view *view, + struct drm_i915_gem_object *obj) +{ + struct sg_table *st; + struct scatterlist *sg, *iter; + unsigned int count = view->partial.size; + unsigned int offset; + int ret = -ENOMEM; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, count, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); + GEM_BUG_ON(!iter); + + sg = st->sgl; + st->nents = 0; + do { + unsigned int len; + + len = min(iter->length - (offset << PAGE_SHIFT), + count << PAGE_SHIFT); + sg_set_page(sg, NULL, len, 0); + sg_dma_address(sg) = + sg_dma_address(iter) + (offset << PAGE_SHIFT); + sg_dma_len(sg) = len; + + st->nents++; + count -= len >> PAGE_SHIFT; + if (count == 0) { + sg_mark_end(sg); + i915_sg_trim(st); /* Drop any unused tail entries. */ + + return st; + } + + sg = __sg_next(sg); + iter = __sg_next(iter); + offset = 0; + } while (1); + +err_sg_alloc: + kfree(st); +err_st_alloc: + return ERR_PTR(ret); +} + +static int +i915_get_ggtt_vma_pages(struct i915_vma *vma) +{ + int ret; + + /* + * The vma->pages are only valid within the lifespan of the borrowed + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so + * must be the vma->pages. A simple rule is that vma->pages must only + * be accessed when the obj->mm.pages are pinned. + */ + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); + + switch (vma->ggtt_view.type) { + default: + GEM_BUG_ON(vma->ggtt_view.type); + /* fall through */ + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; + return 0; + + case I915_GGTT_VIEW_ROTATED: + vma->pages = + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_REMAPPED: + vma->pages = + intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: + vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); + break; + } + + ret = 0; + if (IS_ERR(vma->pages)) { + ret = PTR_ERR(vma->pages); + vma->pages = NULL; + DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", + vma->ggtt_view.type, ret); + } + return ret; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 4294f146f13c..51b8718513bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -7,6 +7,8 @@ #ifndef _INTEL_GPU_COMMANDS_H_ #define _INTEL_GPU_COMMANDS_H_ +#include <linux/bitops.h> + /* * Target address alignments required for GPU access e.g. * MI_STORE_DWORD_IMM. @@ -319,4 +321,31 @@ #define COLOR_BLT ((0x2<<29)|(0x40<<22)) #define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) +/* + * Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline u64 gen8_canonical_addr(u64 address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline u64 gen8_noncanonical_addr(u64 address) +{ + return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); +} + +static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags) +{ + *cs++ = MI_BATCH_BUFFER_START | flags; + *cs++ = addr; + + return cs; +} + #endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 4c26daf7ee46..da2b6e2ae692 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -3,12 +3,15 @@ * Copyright © 2019 Intel Corporation */ +#include "debugfs_gt.h" #include "i915_drv.h" +#include "intel_context.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" +#include "intel_renderstate.h" #include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -25,6 +28,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_reset(gt); intel_gt_init_requests(gt); + intel_gt_init_timelines(gt); intel_gt_pm_init_early(gt); intel_rps_init_early(>->rps); @@ -34,8 +38,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) { gt->ggtt = ggtt; - - intel_gt_sanitize(gt, false); } static void init_unused_ring(struct intel_gt *gt, u32 base) @@ -73,11 +75,6 @@ int intel_gt_init_hw(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; int ret; - BUG_ON(!i915->kernel_context); - ret = intel_gt_terminally_wedged(gt); - if (ret) - return ret; - gt->last_init_time = ktime_get(); /* Double layer security blanket, see i915_gem_init() */ @@ -303,7 +300,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) intel_gt_chipset_flush(gt); - with_intel_runtime_pm(uncore->rpm, wakeref) { + with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { unsigned long flags; spin_lock_irqsave(&uncore->lock, flags); @@ -323,6 +320,8 @@ void intel_gt_chipset_flush(struct intel_gt *gt) void intel_gt_driver_register(struct intel_gt *gt) { intel_rps_driver_register(>->rps); + + debugfs_gt_register(gt); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -364,22 +363,272 @@ static void intel_gt_fini_scratch(struct intel_gt *gt) i915_vma_unpin_and_release(>->scratch, 0); } +static struct i915_address_space *kernel_vm(struct intel_gt *gt) +{ + if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) + return &i915_ppgtt_create(gt)->vm; + else + return i915_vm_get(>->ggtt->vm); +} + +static int __intel_context_flush_retire(struct intel_context *ce) +{ + struct intel_timeline *tl; + + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + intel_context_timeline_unlock(tl); + return 0; +} + +static int __engines_record_defaults(struct intel_gt *gt) +{ + struct i915_request *requests[I915_NUM_ENGINES] = {}; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ + + for_each_engine(engine, gt, id) { + struct intel_renderstate so; + struct intel_context *ce; + struct i915_request *rq; + + /* We must be able to switch to something! */ + GEM_BUG_ON(!engine->kernel_context); + + err = intel_renderstate_init(&so, engine); + if (err) + goto out; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + intel_context_put(ce); + goto out; + } + + err = intel_engine_emit_ctx_wa(rq); + if (err) + goto err_rq; + + err = intel_renderstate_emit(&so, rq); + if (err) + goto err_rq; + +err_rq: + requests[id] = i915_request_get(rq); + i915_request_add(rq); + intel_renderstate_fini(&so); + if (err) + goto out; + } + + /* Flush the default context image to memory, and enable powersaving. */ + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + err = -EIO; + goto out; + } + + for (id = 0; id < ARRAY_SIZE(requests); id++) { + struct i915_request *rq; + struct i915_vma *state; + void *vaddr; + + rq = requests[id]; + if (!rq) + continue; + + GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); + state = rq->context->state; + if (!state) + continue; + + /* Serialise with retirement on another CPU */ + GEM_BUG_ON(!i915_request_completed(rq)); + err = __intel_context_flush_retire(rq->context); + if (err) + goto out; + + /* We want to be able to unbind the state from the GGTT */ + GEM_BUG_ON(intel_context_is_pinned(rq->context)); + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto out; + + i915_gem_object_lock(state->obj); + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + i915_gem_object_unlock(state->obj); + if (err) + goto out; + + i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); + + /* Check we can acquire the image of the context state */ + vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } + + rq->engine->default_state = i915_gem_object_get(state->obj); + i915_gem_object_unpin_map(state->obj); + } + +out: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. The quickest way we can accomplish + * this is by declaring ourselves wedged. + */ + if (err) + intel_gt_set_wedged(gt); + + for (id = 0; id < ARRAY_SIZE(requests); id++) { + struct intel_context *ce; + struct i915_request *rq; + + rq = requests[id]; + if (!rq) + continue; + + ce = rq->context; + i915_request_put(rq); + intel_context_put(ce); + } + return err; +} + +static int __engines_verify_workarounds(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, gt, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + +static void __intel_gt_disable(struct intel_gt *gt) +{ + intel_gt_set_wedged_on_init(gt); + + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); +} + int intel_gt_init(struct intel_gt *gt) { int err; - err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + err = i915_inject_probe_error(gt->i915, -ENODEV); if (err) return err; + /* + * This is just a security blanket to placate dragons. + * On some systems, we very sporadically observe that the first TLBs + * used by the CS may be stale, despite us poking the TLB reset. If + * we hold the forcewake during initialisation these problems + * just magically go away. + */ + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + if (err) + goto out_fw; + intel_gt_pm_init(gt); - return 0; + gt->vm = kernel_vm(gt); + if (!gt->vm) { + err = -ENOMEM; + goto err_pm; + } + + err = intel_engines_init(gt); + if (err) + goto err_engines; + + intel_uc_init(>->uc); + + err = intel_gt_resume(gt); + if (err) + goto err_uc_init; + + err = __engines_record_defaults(gt); + if (err) + goto err_gt; + + err = __engines_verify_workarounds(gt); + if (err) + goto err_gt; + + err = i915_inject_probe_error(gt->i915, -EIO); + if (err) + goto err_gt; + + goto out_fw; +err_gt: + __intel_gt_disable(gt); + intel_uc_fini_hw(>->uc); +err_uc_init: + intel_uc_fini(>->uc); +err_engines: + intel_engines_release(gt); + i915_vm_put(fetch_and_zero(>->vm)); +err_pm: + intel_gt_pm_fini(gt); + intel_gt_fini_scratch(gt); +out_fw: + if (err) + intel_gt_set_wedged_on_init(gt); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + return err; } void intel_gt_driver_remove(struct intel_gt *gt) { - GEM_BUG_ON(gt->awake); + __intel_gt_disable(gt); + + intel_uc_fini_hw(>->uc); + intel_uc_fini(>->uc); + + intel_engines_release(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) @@ -389,6 +638,12 @@ void intel_gt_driver_unregister(struct intel_gt *gt) void intel_gt_driver_release(struct intel_gt *gt) { + struct i915_address_space *vm; + + vm = fetch_and_zero(>->vm); + if (vm) /* FIXME being called twice on error paths :( */ + i915_vm_put(vm); + intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); } @@ -396,5 +651,8 @@ void intel_gt_driver_release(struct intel_gt *gt) void intel_gt_driver_late_release(struct intel_gt *gt) { intel_uc_driver_late_release(>->uc); + intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); + intel_gt_fini_timelines(gt); + intel_engines_free(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 5436f8c30708..1dac441cb8f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -12,6 +12,12 @@ struct drm_i915_private; +#define GT_TRACE(gt, fmt, ...) do { \ + const struct intel_gt *gt__ __maybe_unused = (gt); \ + GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ + ##__VA_ARGS__); \ +} while (0) + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); @@ -52,9 +58,14 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, return i915_ggtt_offset(gt->scratch) + field; } -static inline bool intel_gt_is_wedged(struct intel_gt *gt) +static inline bool intel_gt_is_wedged(const struct intel_gt *gt) { return __intel_reset_failed(>->reset); } +static inline bool intel_gt_has_init_error(const struct intel_gt *gt) +{ + return test_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 973ee7eded64..f796bdf1ed30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -28,7 +28,7 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { - intel_engine_breadcrumbs_irq(engine); + intel_engine_signal_breadcrumbs(engine); tasklet |= intel_engine_needs_breadcrumb_tasklet(engine); } @@ -245,9 +245,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); } static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) @@ -271,11 +271,11 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]); + intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7e64b7d7d330..d1c2f034296a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -43,7 +43,7 @@ static int __gt_unpark(struct intel_wakeref *wf) struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); struct drm_i915_private *i915 = gt->i915; - GEM_TRACE("\n"); + GT_TRACE(gt, "\n"); i915_globals_unpark(); @@ -61,9 +61,7 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); - if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - + intel_rc6_unpark(>->rc6); intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); @@ -78,22 +76,18 @@ static int __gt_park(struct intel_wakeref *wf) intel_wakeref_t wakeref = fetch_and_zero(>->awake); struct drm_i915_private *i915 = gt->i915; - GEM_TRACE("\n"); + GT_TRACE(gt, "\n"); intel_gt_park_requests(gt); i915_vma_parked(gt); i915_pmu_gt_parked(i915); intel_rps_park(>->rps); + intel_rc6_park(>->rc6); /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); - if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { - intel_rc6_ctx_wa_check(&i915->gt.rc6); - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - } - /* Defer dropping the display power well for 100ms, it's slow! */ GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); @@ -132,23 +126,13 @@ static bool reset_engines(struct intel_gt *gt) return __intel_gt_reset(gt, ALL_ENGINES) == 0; } -/** - * intel_gt_sanitize: called after the GPU has lost power - * @gt: the i915 GT container - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_gt_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_gt_sanitize(struct intel_gt *gt, bool force) +static void gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; intel_wakeref_t wakeref; - GEM_TRACE("force:%s\n", yesno(force)); + GT_TRACE(gt, "force:%s", yesno(force)); /* Use a raw wakeref to avoid calling intel_display_power_get early */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); @@ -193,9 +177,13 @@ int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - int err = 0; + int err; - GEM_TRACE("\n"); + err = intel_gt_has_init_error(gt); + if (err) + return err; + + GT_TRACE(gt, "\n"); /* * After resume, we may need to poke into the pinned kernel @@ -207,21 +195,26 @@ int intel_gt_resume(struct intel_gt *gt) intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); + gt_sanitize(gt, true); + if (intel_gt_is_wedged(gt)) { + err = -EIO; + goto out_fw; + } + + /* Only when the HW is re-initialised, can we replay the requests */ + err = intel_gt_init_hw(gt); + if (err) { + dev_err(gt->i915->drm.dev, + "Failed to initialize GPU, declaring it wedged!\n"); + goto err_wedged; + } intel_rps_enable(>->rps); intel_llc_enable(>->llc); for_each_engine(engine, gt, id) { - struct intel_context *ce; - intel_engine_pm_get(engine); - ce = engine->kernel_context; - if (ce) { - GEM_BUG_ON(!intel_context_is_pinned(ce)); - ce->ops->reset(ce); - } - engine->serial++; /* kernel context lost */ err = engine->resume(engine); @@ -230,7 +223,7 @@ int intel_gt_resume(struct intel_gt *gt) dev_err(gt->i915->drm.dev, "Failed to restart %s (%d)\n", engine->name, err); - break; + goto err_wedged; } } @@ -240,10 +233,14 @@ int intel_gt_resume(struct intel_gt *gt) user_forcewake(gt, false); +out_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); - return err; + +err_wedged: + intel_gt_set_wedged(gt); + goto out_fw; } static void wait_for_suspend(struct intel_gt *gt) @@ -257,6 +254,7 @@ static void wait_for_suspend(struct intel_gt *gt) * the gpu quiet. */ intel_gt_set_wedged(gt); + intel_gt_retire_requests(gt); } intel_gt_pm_wait_for_idle(gt); @@ -286,6 +284,11 @@ void intel_gt_suspend_late(struct intel_gt *gt) /* We expect to be idle already; but also want to be independent */ wait_for_suspend(gt); + if (is_mock_gt(gt)) + return; + + GEM_BUG_ON(gt->awake); + /* * On disabling the device, we want to turn off HW access to memory * that we no longer own. @@ -305,22 +308,21 @@ void intel_gt_suspend_late(struct intel_gt *gt) intel_llc_disable(>->llc); } - intel_gt_sanitize(gt, false); + gt_sanitize(gt, false); - GEM_TRACE("\n"); + GT_TRACE(gt, "\n"); } void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); - GEM_TRACE("\n"); + GT_TRACE(gt, "\n"); } int intel_gt_runtime_resume(struct intel_gt *gt) { - GEM_TRACE("\n"); - + GT_TRACE(gt, "\n"); intel_gt_init_swizzling(gt); return intel_uc_runtime_resume(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 990efc27a4e4..60f0e2fbe55c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -22,6 +22,11 @@ static inline void intel_gt_pm_get(struct intel_gt *gt) intel_wakeref_get(>->wakeref); } +static inline void __intel_gt_pm_get(struct intel_gt *gt) +{ + __intel_wakeref_get(>->wakeref); +} + static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) { return intel_wakeref_get_if_active(>->wakeref); @@ -46,8 +51,6 @@ void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_pm_init(struct intel_gt *gt); void intel_gt_pm_fini(struct intel_gt *gt); -void intel_gt_sanitize(struct intel_gt *gt, bool force); - void intel_gt_suspend_prepare(struct intel_gt *gt); void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 3dc13ecf41bf..7ef1d37970f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -8,27 +8,40 @@ #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" +#include "intel_engine_heartbeat.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_timeline.h" -static void retire_requests(struct intel_timeline *tl) +static bool retire_requests(struct intel_timeline *tl) { struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &tl->requests, link) if (!i915_request_retire(rq)) - break; + return false; + + /* And check nothing new was submitted */ + return !i915_active_fence_isset(&tl->last_request); } -static void flush_submission(struct intel_gt *gt) +static bool flush_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + bool active = false; + + if (!intel_gt_pm_is_awake(gt)) + return false; - for_each_engine(engine, gt, id) + for_each_engine(engine, gt, id) { intel_engine_flush_submission(engine); + active |= flush_work(&engine->retire_work); + active |= flush_work(&engine->wakeref.work); + } + + return active; } static void engine_retire(struct work_struct *work) @@ -62,19 +75,16 @@ static void engine_retire(struct work_struct *work) static bool add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl) { +#define STUB ((struct intel_timeline *)1) struct intel_timeline *first; /* * We open-code a llist here to include the additional tag [BIT(0)] * so that we know when the timeline is already on a * retirement queue: either this engine or another. - * - * However, we rely on that a timeline can only be active on a single - * engine at any one time and that add_retire() is called before the - * engine releases the timeline and transferred to another to retire. */ - if (READ_ONCE(tl->retire)) /* already queued */ + if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */ return false; intel_timeline_get(tl); @@ -109,7 +119,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; unsigned long active_count = 0; - unsigned long flags; bool interruptible; LIST_HEAD(free); @@ -118,8 +127,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) timeout = -timeout, interruptible = false; flush_submission(gt); /* kick the ksoftirqd tasklets */ - - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) { active_count++; /* report busy to caller, try again? */ @@ -129,7 +137,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) intel_timeline_get(tl); GEM_BUG_ON(!atomic_read(&tl->active_count)); atomic_inc(&tl->active_count); /* pin the list element */ - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); if (timeout > 0) { struct dma_fence *fence; @@ -143,16 +151,15 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } } - retire_requests(tl); + if (!retire_requests(tl) || flush_submission(gt)) + active_count++; - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - else - active_count += !!rcu_access_pointer(tl->last_request.fence); mutex_unlock(&tl->mutex); @@ -162,7 +169,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) list_add(&tl->link, &free); } } - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); @@ -190,9 +197,9 @@ static void retire_work_handler(struct work_struct *work) struct intel_gt *gt = container_of(work, typeof(*gt), requests.retire_work.work); - intel_gt_retire_requests(gt); schedule_delayed_work(>->requests.retire_work, round_jiffies_up_relative(HZ)); + intel_gt_retire_requests(gt); } void intel_gt_init_requests(struct intel_gt *gt) @@ -210,3 +217,9 @@ void intel_gt_unpark_requests(struct intel_gt *gt) schedule_delayed_work(>->requests.retire_work, round_jiffies_up_relative(HZ)); } + +void intel_gt_fini_requests(struct intel_gt *gt) +{ + /* Wait until the work is marked as finished before unloading! */ + cancel_delayed_work_sync(>->requests.retire_work); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index d626fb115386..dbac53baf1cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -27,5 +27,6 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); void intel_gt_park_requests(struct intel_gt *gt); void intel_gt_unpark_requests(struct intel_gt *gt); +void intel_gt_fini_requests(struct intel_gt *gt); #endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index d4e14dbd172e..96890dd12b5f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -90,6 +90,13 @@ struct intel_gt { struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] [MAX_ENGINE_INSTANCE + 1]; + + /* + * Default address space (either GGTT or ppGTT depending on arch). + * + * Reserved for exclusive use by the kernel. + */ + struct i915_address_space *vm; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c new file mode 100644 index 000000000000..16acdc5d6734 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/slab.h> /* fault-inject.h is not standalone! */ + +#include <linux/fault-inject.h> + +#include "i915_trace.h" +#include "intel_gt.h" +#include "intel_gtt.h" + +void stash_init(struct pagestash *stash) +{ + pagevec_init(&stash->pvec); + spin_lock_init(&stash->lock); +} + +static struct page *stash_pop_page(struct pagestash *stash) +{ + struct page *page = NULL; + + spin_lock(&stash->lock); + if (likely(stash->pvec.nr)) + page = stash->pvec.pages[--stash->pvec.nr]; + spin_unlock(&stash->lock); + + return page; +} + +static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) +{ + unsigned int nr; + + spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); + + nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); + memcpy(stash->pvec.pages + stash->pvec.nr, + pvec->pages + pvec->nr - nr, + sizeof(pvec->pages[0]) * nr); + stash->pvec.nr += nr; + + spin_unlock(&stash->lock); + + pvec->nr -= nr; +} + +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) +{ + struct pagevec stack; + struct page *page; + + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); + + page = stash_pop_page(&vm->free_pages); + if (page) + return page; + + if (!vm->pt_kmap_wc) + return alloc_page(gfp); + + /* Look in our global stash of WC pages... */ + page = stash_pop_page(&vm->i915->mm.wc_stash); + if (page) + return page; + + /* + * Otherwise batch allocate pages to amortize cost of set_pages_wc. + * + * We have to be careful as page allocation may trigger the shrinker + * (via direct reclaim) which will fill up the WC stash underneath us. + * So we add our WB pages into a temporary pvec on the stack and merge + * them into the WC stash after all the allocations are complete. + */ + pagevec_init(&stack); + do { + struct page *page; + + page = alloc_page(gfp); + if (unlikely(!page)) + break; + + stack.pages[stack.nr++] = page; + } while (pagevec_space(&stack)); + + if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { + page = stack.pages[--stack.nr]; + + /* Merge spare WC pages to the global stash */ + if (stack.nr) + stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); + + /* Push any surplus WC pages onto the local VM stash */ + if (stack.nr) + stash_push_pagevec(&vm->free_pages, &stack); + } + + /* Return unwanted leftovers */ + if (unlikely(stack.nr)) { + WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); + __pagevec_release(&stack); + } + + return page; +} + +static void vm_free_pages_release(struct i915_address_space *vm, + bool immediate) +{ + struct pagevec *pvec = &vm->free_pages.pvec; + struct pagevec stack; + + lockdep_assert_held(&vm->free_pages.lock); + GEM_BUG_ON(!pagevec_count(pvec)); + + if (vm->pt_kmap_wc) { + /* + * When we use WC, first fill up the global stash and then + * only if full immediately free the overflow. + */ + stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); + + /* + * As we have made some room in the VM's free_pages, + * we can wait for it to fill again. Unless we are + * inside i915_address_space_fini() and must + * immediately release the pages! + */ + if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) + return; + + /* + * We have to drop the lock to allow ourselves to sleep, + * so take a copy of the pvec and clear the stash for + * others to use it as we sleep. + */ + stack = *pvec; + pagevec_reinit(pvec); + spin_unlock(&vm->free_pages.lock); + + pvec = &stack; + set_pages_array_wb(pvec->pages, pvec->nr); + + spin_lock(&vm->free_pages.lock); + } + + __pagevec_release(pvec); +} + +static void vm_free_page(struct i915_address_space *vm, struct page *page) +{ + /* + * On !llc, we need to change the pages back to WB. We only do so + * in bulk, so we rarely need to change the page attributes here, + * but doing so requires a stop_machine() from deep inside arch/x86/mm. + * To make detection of the possible sleep more likely, use an + * unconditional might_sleep() for everybody. + */ + might_sleep(); + spin_lock(&vm->free_pages.lock); + while (!pagevec_space(&vm->free_pages.pvec)) + vm_free_pages_release(vm, false); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); + pagevec_add(&vm->free_pages.pvec, page); + spin_unlock(&vm->free_pages.lock); +} + +void __i915_vm_close(struct i915_address_space *vm) +{ + struct i915_vma *vma, *vn; + + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + __i915_vma_put(vma); + + i915_gem_object_put(obj); + } + GEM_BUG_ON(!list_empty(&vm->bound_list)); + mutex_unlock(&vm->mutex); +} + +void i915_address_space_fini(struct i915_address_space *vm) +{ + spin_lock(&vm->free_pages.lock); + if (pagevec_count(&vm->free_pages.pvec)) + vm_free_pages_release(vm, true); + GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); + spin_unlock(&vm->free_pages.lock); + + drm_mm_takedown(&vm->mm); + + mutex_destroy(&vm->mutex); +} + +static void __i915_vm_release(struct work_struct *work) +{ + struct i915_address_space *vm = + container_of(work, struct i915_address_space, rcu.work); + + vm->cleanup(vm); + i915_address_space_fini(vm); + + kfree(vm); +} + +void i915_vm_release(struct kref *kref) +{ + struct i915_address_space *vm = + container_of(kref, struct i915_address_space, ref); + + GEM_BUG_ON(i915_is_ggtt(vm)); + trace_i915_ppgtt_release(vm); + + queue_rcu_work(vm->i915->wq, &vm->rcu); +} + +void i915_address_space_init(struct i915_address_space *vm, int subclass) +{ + kref_init(&vm->ref); + INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + atomic_set(&vm->open, 1); + + /* + * The vm->mutex must be reclaim safe (for use in the shrinker). + * Do a dummy acquire now under fs_reclaim so that any allocation + * attempt holding the lock is immediately reported by lockdep. + */ + mutex_init(&vm->mutex); + lockdep_set_subclass(&vm->mutex, subclass); + i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); + + GEM_BUG_ON(!vm->total); + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + + stash_init(&vm->free_pages); + + INIT_LIST_HEAD(&vm->bound_list); +} + +void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); +} + +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); + if (unlikely(!p->page)) + return -ENOMEM; + + p->daddr = dma_map_page_attrs(vm->dma, + p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_WARN); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; + } + + return 0; +} + +int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +{ + return __setup_page_dma(vm, p, __GFP_HIGHMEM); +} + +void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +{ + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); +} + +void +fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) +{ + kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); +} + +int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) +{ + unsigned long size; + + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). However, as we share the + * scratch (read-only) between all vm, we create one 64k scratch page + * for all. + */ + size = I915_GTT_PAGE_SIZE_4K; + if (i915_vm_is_4lvl(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + size = I915_GTT_PAGE_SIZE_64K; + gfp |= __GFP_NOWARN; + } + gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; + + do { + unsigned int order = get_order(size); + struct page *page; + dma_addr_t addr; + + page = alloc_pages(gfp, order); + if (unlikely(!page)) + goto skip; + + addr = dma_map_page_attrs(vm->dma, + page, 0, size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_WARN); + if (unlikely(dma_mapping_error(vm->dma, addr))) + goto free_page; + + if (unlikely(!IS_ALIGNED(addr, size))) + goto unmap_page; + + vm->scratch[0].base.page = page; + vm->scratch[0].base.daddr = addr; + vm->scratch_order = order; + return 0; + +unmap_page: + dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); +free_page: + __free_pages(page, order); +skip: + if (size == I915_GTT_PAGE_SIZE_4K) + return -ENOMEM; + + size = I915_GTT_PAGE_SIZE_4K; + gfp &= ~__GFP_NOWARN; + } while (1); +} + +void cleanup_scratch_page(struct i915_address_space *vm) +{ + struct i915_page_dma *p = px_base(&vm->scratch[0]); + unsigned int order = vm->scratch_order; + + dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, order); +} + +void free_scratch(struct i915_address_space *vm) +{ + int i; + + if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ + return; + + for (i = 1; i <= vm->top; i++) { + if (!px_dma(&vm->scratch[i])) + break; + cleanup_page_dma(vm, px_base(&vm->scratch[i])); + } + + cleanup_scratch_page(vm); +} + +void gtt_write_workarounds(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + + /* + * This function is for gtt related workarounds. This function is + * called on driver load and after a GPU reset, so you can place + * workarounds here even if they get overwritten by GPU reset. + */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ + if (IS_BROADWELL(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); + else if (IS_CHERRYVIEW(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); + else if (IS_GEN9_LP(i915)) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) + intel_uncore_write(uncore, + GEN8_L3_LRA_1_GPGPU, + GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(i915) <= 10) + intel_uncore_rmw(uncore, + GEN8_GAMW_ECO_DEV_RW_IA, + 0, + GAMW_ECO_ENABLE_64K_IPS_FIELD); + + if (IS_GEN_RANGE(i915, 8, 11)) { + bool can_use_gtt_cache = true; + + /* + * According to the BSpec if we use 2M/1G pages then we also + * need to disable the GTT cache. At least on BDW we can see + * visual corruption when using 2M pages, and not disabling the + * GTT cache. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) + can_use_gtt_cache = false; + + /* WaGttCachingOffByDefault */ + intel_uncore_write(uncore, + HSW_GTT_CACHE_EN, + can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); + WARN_ON_ONCE(can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); + } +} + +u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; + + switch (level) { + case I915_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case I915_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + return pte; +} + +static void tgl_setup_private_ppat(struct intel_uncore *uncore) +{ + /* TGL doesn't support LLC or AGE settings */ + intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); +} + +static void cnl_setup_private_ppat(struct intel_uncore *uncore) +{ + intel_uncore_write(uncore, + GEN10_PAT_INDEX(0), + GEN8_PPAT_WB | GEN8_PPAT_LLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(1), + GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(2), + GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(3), + GEN8_PPAT_UC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(4), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(5), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(6), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(7), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); +} + +/* + * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability + * bits. When using advanced contexts each context stores its own PAT, but + * writing this data shouldn't be harmful even in those cases. + */ +static void bdw_setup_private_ppat(struct intel_uncore *uncore) +{ + u64 pat; + + pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ + GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ + GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ + GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ + GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | + GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | + GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | + GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +static void chv_setup_private_ppat(struct intel_uncore *uncore) +{ + u64 pat; + + /* + * Map WB on BDW to snooped on CHV. + * + * Only the snoop bit has meaning for CHV, the rest is + * ignored. + * + * The hardware will never snoop for certain types of accesses: + * - CPU GTT (GMADR->GGTT->no snoop->memory) + * - PPGTT page tables + * - some other special cycles + * + * As with BDW, we also need to consider the following for GT accesses: + * "For GGTT, there is NO pat_sel[2:0] from the entry, + * so RTL will always use the value corresponding to + * pat_sel = 000". + * Which means we must set the snoop bit in PAT entry 0 + * in order to keep the global status page working. + */ + + pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | + GEN8_PPAT(1, 0) | + GEN8_PPAT(2, 0) | + GEN8_PPAT(3, 0) | + GEN8_PPAT(4, CHV_PPAT_SNOOP) | + GEN8_PPAT(5, CHV_PPAT_SNOOP) | + GEN8_PPAT(6, CHV_PPAT_SNOOP) | + GEN8_PPAT(7, CHV_PPAT_SNOOP); + + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +void setup_private_pat(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + if (INTEL_GEN(i915) >= 12) + tgl_setup_private_ppat(uncore); + else if (INTEL_GEN(i915) >= 10) + cnl_setup_private_ppat(uncore); + else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) + chv_setup_private_ppat(uncore); + else + bdw_setup_private_ppat(uncore); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h new file mode 100644 index 000000000000..7da7681c20b1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -0,0 +1,587 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + * + * Please try to maintain the following order within this file unless it makes + * sense to do otherwise. From top to bottom: + * 1. typedefs + * 2. #defines, and macros + * 3. structure definitions + * 4. function prototypes + * + * Within each section, please try to order by generation in ascending order, + * from top to bottom (ie. gen6 on the top, gen8 on the bottom). + */ + +#ifndef __INTEL_GTT_H__ +#define __INTEL_GTT_H__ + +#include <linux/io-mapping.h> +#include <linux/kref.h> +#include <linux/mm.h> +#include <linux/pagevec.h> +#include <linux/scatterlist.h> +#include <linux/workqueue.h> + +#include <drm/drm_mm.h> + +#include "gt/intel_reset.h" +#include "i915_gem_fence_reg.h" +#include "i915_selftest.h" +#include "i915_vma_types.h" + +#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) +#define DBG(...) trace_printk(__VA_ARGS__) +#else +#define DBG(...) +#endif + +#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */ + +#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) +#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) +#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + +#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE + +#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE + +#define I915_FENCE_REG_NONE -1 +#define I915_MAX_NUM_FENCES 32 +/* 32 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 6 + +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; + +#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT) + +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) +#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) +#define I915_PDES 512 +#define I915_PDE_MASK (I915_PDES - 1) + +/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ +#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) +#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) +#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) +#define GEN6_PTE_CACHE_LLC (2 << 1) +#define GEN6_PTE_UNCACHED (1 << 1) +#define GEN6_PTE_VALID REG_BIT(0) + +#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t)) +#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE) +#define GEN6_PD_ALIGN (PAGE_SIZE * 16) +#define GEN6_PDE_SHIFT 22 +#define GEN6_PDE_VALID REG_BIT(0) +#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) + +#define GEN7_PTE_CACHE_L3_LLC (3 << 1) + +#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2) +#define BYT_PTE_WRITEABLE REG_BIT(1) + +/* + * Cacheability Control is a 4-bit value. The low three bits are stored in bits + * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. + */ +#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ + (((bits) & 0x8) << (11 - 3))) +#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) +#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) +#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) +#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) +#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) +#define HSW_PTE_UNCACHED (0) +#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) +#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) + +/* + * GEN8 32b style address is defined as a 3 level page table: + * 31:30 | 29:21 | 20:12 | 11:0 + * PDPE | PDE | PTE | offset + * The difference as compared to normal x86 3 level page table is the PDPEs are + * programmed via register. + * + * GEN8 48b style address is defined as a 4 level page table: + * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 + * PML4E | PDPE | PDE | PTE | offset + */ +#define GEN8_3LVL_PDPES 4 + +#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE 0 /* WB LLC */ +#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ + +#define CHV_PPAT_SNOOP REG_BIT(6) +#define GEN8_PPAT_AGE(x) ((x)<<4) +#define GEN8_PPAT_LLCeLLC (3<<2) +#define GEN8_PPAT_LLCELLC (2<<2) +#define GEN8_PPAT_LLC (1<<2) +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_UC (0<<0) +#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) + +#define GEN8_PDE_IPS_64K BIT(11) +#define GEN8_PDE_PS_2M BIT(7) + +#define for_each_sgt_daddr(__dp, __iter, __sgt) \ + __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) + +struct i915_page_dma { + struct page *page; + union { + dma_addr_t daddr; + + /* + * For gen6/gen7 only. This is the offset in the GGTT + * where the page directory entries for PPGTT begin + */ + u32 ggtt_offset; + }; +}; + +struct i915_page_scratch { + struct i915_page_dma base; + u64 encode; +}; + +struct i915_page_table { + struct i915_page_dma base; + atomic_t used; +}; + +struct i915_page_directory { + struct i915_page_table pt; + spinlock_t lock; + void *entry[512]; +}; + +#define __px_choose_expr(x, type, expr, other) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), type) || \ + __builtin_types_compatible_p(typeof(x), const type), \ + ({ type __x = (type)(x); expr; }), \ + other) + +#define px_base(px) \ + __px_choose_expr(px, struct i915_page_dma *, __x, \ + __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ + __px_choose_expr(px, struct i915_page_table *, &__x->base, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ + (void)0)))) +#define px_dma(px) (px_base(px)->daddr) + +#define px_pt(px) \ + __px_choose_expr(px, struct i915_page_table *, __x, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \ + (void)0)) +#define px_used(px) (&px_pt(px)->used) + +enum i915_cache_level; + +struct drm_i915_file_private; +struct drm_i915_gem_object; +struct i915_vma; +struct intel_gt; + +struct i915_vma_ops { + /* Map an object into an address space with the given cache flags. */ + int (*bind_vma)(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); + /* + * Unmap an object from an address space. This usually consists of + * setting the valid PTE entries to a reserved scratch page. + */ + void (*unbind_vma)(struct i915_vma *vma); + + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); +}; + +struct pagestash { + spinlock_t lock; + struct pagevec pvec; +}; + +void stash_init(struct pagestash *stash); + +struct i915_address_space { + struct kref ref; + struct rcu_work rcu; + + struct drm_mm mm; + struct intel_gt *gt; + struct drm_i915_private *i915; + struct device *dma; + /* + * Every address space belongs to a struct file - except for the global + * GTT that is owned by the driver (and so @file is set to NULL). In + * principle, no information should leak from one context to another + * (or between files/processes etc) unless explicitly shared by the + * owner. Tracking the owner is important in order to free up per-file + * objects along with the file, to aide resource tracking, and to + * assign blame. + */ + struct drm_i915_file_private *file; + u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ + + unsigned int bind_async_flags; + + /* + * Each active user context has its own address space (in full-ppgtt). + * Since the vm may be shared between multiple contexts, we count how + * many contexts keep us "open". Once open hits zero, we are closed + * and do not allow any new attachments, and proceed to shutdown our + * vma and page directories. + */ + atomic_t open; + + struct mutex mutex; /* protects vma and our lists */ +#define VM_CLASS_GGTT 0 +#define VM_CLASS_PPGTT 1 + + struct i915_page_scratch scratch[4]; + unsigned int scratch_order; + unsigned int top; + + /** + * List of vma currently bound. + */ + struct list_head bound_list; + + struct pagestash free_pages; + + /* Global GTT */ + bool is_ggtt:1; + + /* Some systems require uncached updates of the page directories */ + bool pt_kmap_wc:1; + + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; + + u64 (*pte_encode)(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); /* Create a valid PTE */ +#define PTE_READ_ONLY BIT(0) + + int (*allocate_va_range)(struct i915_address_space *vm, + u64 start, u64 length); + void (*clear_range)(struct i915_address_space *vm, + u64 start, u64 length); + void (*insert_page)(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 flags); + void (*insert_entries)(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); + void (*cleanup)(struct i915_address_space *vm); + + struct i915_vma_ops vma_ops; + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); +}; + +/* + * The Graphics Translation Table is the way in which GEN hardware translates a + * Graphics Virtual Address into a Physical Address. In addition to the normal + * collateral associated with any va->pa translations GEN hardware also has a + * portion of the GTT which can be mapped by the CPU and remain both coherent + * and correct (in cases like swizzling). That region is referred to as GMADR in + * the spec. + */ +struct i915_ggtt { + struct i915_address_space vm; + + struct io_mapping iomap; /* Mapping to our CPU mappable region */ + struct resource gmadr; /* GMADR resource */ + resource_size_t mappable_end; /* End offset that we can CPU map */ + + /** "Graphics Stolen Memory" holds the global PTEs */ + void __iomem *gsm; + void (*invalidate)(struct i915_ggtt *ggtt); + + /** PPGTT used for aliasing the PPGTT with the GTT */ + struct i915_ppgtt *alias; + + bool do_idle_maps; + + int mtrr; + + /** Bit 6 swizzling required for X tiling */ + u32 bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + u32 bit_6_swizzle_y; + + u32 pin_bias; + + unsigned int num_fences; + struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; + struct list_head fence_list; + + /** + * List of all objects in gtt_space, currently mmaped by userspace. + * All objects within this list must also be on bound_list. + */ + struct list_head userfault_list; + + /* Manual runtime pm autosuspend delay for user GGTT mmaps */ + struct intel_wakeref_auto userfault_wakeref; + + struct mutex error_mutex; + struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; +}; + +struct i915_ppgtt { + struct i915_address_space vm; + + struct i915_page_directory *pd; +}; + +#define i915_is_ggtt(vm) ((vm)->is_ggtt) + +static inline bool +i915_vm_is_4lvl(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); +} + +static inline bool +i915_vm_has_cache_coloring(struct i915_address_space *vm) +{ + return i915_is_ggtt(vm) && vm->mm.color_adjust; +} + +static inline struct i915_ggtt * +i915_vm_to_ggtt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_ggtt, vm)); + GEM_BUG_ON(!i915_is_ggtt(vm)); + return container_of(vm, struct i915_ggtt, vm); +} + +static inline struct i915_ppgtt * +i915_vm_to_ppgtt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm)); + GEM_BUG_ON(i915_is_ggtt(vm)); + return container_of(vm, struct i915_ppgtt, vm); +} + +static inline struct i915_address_space * +i915_vm_get(struct i915_address_space *vm) +{ + kref_get(&vm->ref); + return vm; +} + +void i915_vm_release(struct kref *kref); + +static inline void i915_vm_put(struct i915_address_space *vm) +{ + kref_put(&vm->ref, i915_vm_release); +} + +static inline struct i915_address_space * +i915_vm_open(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + atomic_inc(&vm->open); + return i915_vm_get(vm); +} + +static inline bool +i915_vm_tryopen(struct i915_address_space *vm) +{ + if (atomic_add_unless(&vm->open, 1, 0)) + return i915_vm_get(vm); + + return false; +} + +void __i915_vm_close(struct i915_address_space *vm); + +static inline void +i915_vm_close(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + if (atomic_dec_and_test(&vm->open)) + __i915_vm_close(vm); + + i915_vm_put(vm); +} + +void i915_address_space_init(struct i915_address_space *vm, int subclass); +void i915_address_space_fini(struct i915_address_space *vm); + +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) +{ + const u32 mask = NUM_PTE(pde_shift) - 1; + + return (address >> PAGE_SHIFT) & mask; +} + +/* + * Helper to counts the number of PTEs within the given length. This count + * does not cross a page table boundary, so the max value would be + * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. + */ +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) +{ + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; + + GEM_BUG_ON(length == 0); + GEM_BUG_ON(offset_in_page(addr | length)); + + end = addr + length; + + if ((addr & mask) != (end & mask)) + return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); + + return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); +} + +static inline u32 i915_pde_index(u64 addr, u32 shift) +{ + return (addr >> shift) & I915_PDE_MASK; +} + +static inline struct i915_page_table * +i915_pt_entry(const struct i915_page_directory * const pd, + const unsigned short n) +{ + return pd->entry[n]; +} + +static inline struct i915_page_directory * +i915_pd_entry(const struct i915_page_directory * const pdp, + const unsigned short n) +{ + return pdp->entry[n]; +} + +static inline dma_addr_t +i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) +{ + struct i915_page_dma *pt = ppgtt->pd->entry[n]; + + return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); +} + +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); + +int i915_ggtt_probe_hw(struct drm_i915_private *i915); +int i915_ggtt_init_hw(struct drm_i915_private *i915); +int i915_ggtt_enable_hw(struct drm_i915_private *i915); +void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); +void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); +int i915_init_ggtt(struct drm_i915_private *i915); +void i915_ggtt_driver_release(struct drm_i915_private *i915); + +static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) +{ + return ggtt->mappable_end > 0; +} + +int i915_ppgtt_init_hw(struct intel_gt *gt); + +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); + +void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915); +void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915); + +u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); + +int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); +void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); + +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) + +void +fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); + +#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) +#define fill32_px(px, v) do { \ + u64 v__ = lower_32_bits(v); \ + fill_px((px), v__ << 32 | v__); \ +} while (0) + +int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp); +void cleanup_scratch_page(struct i915_address_space *vm); +void free_scratch(struct i915_address_space *vm); + +struct i915_page_table *alloc_pt(struct i915_address_space *vm); +struct i915_page_directory *alloc_pd(struct i915_address_space *vm); +struct i915_page_directory *__alloc_pd(size_t sz); + +void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd); + +#define free_px(vm, px) free_pd(vm, px_base(px)) + +void +__set_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_dma * const to, + u64 (*encode)(const dma_addr_t, const enum i915_cache_level)); + +#define set_pd_entry(pd, idx, to) \ + __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) + +void +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + const struct i915_page_scratch * const scratch); + +bool +release_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_table * const pt, + const struct i915_page_scratch * const scratch); +void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); + +int ggtt_set_pages(struct i915_vma *vma); +int ppgtt_set_pages(struct i915_vma *vma); +void clear_pages(struct i915_vma *vma); + +void gtt_write_workarounds(struct intel_gt *gt); + +void setup_private_pat(struct intel_uncore *uncore); + +static inline struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + + return (struct sgt_dma){ sg, addr, addr + sg->length }; +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d925a1035c9d..0cf0f6fae675 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,12 +133,11 @@ */ #include <linux/interrupt.h> -#include "gem/i915_gem_context.h" - #include "i915_drv.h" #include "i915_perf.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -489,17 +488,23 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) return desc; } -static u32 *set_offsets(u32 *regs, +static inline unsigned int dword_in_page(void *addr) +{ + return offset_in_page(addr) / sizeof(u32); +} + +static void set_offsets(u32 *regs, const u8 *data, - const struct intel_engine_cs *engine) + const struct intel_engine_cs *engine, + bool clear) #define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count)) +#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) #define POSTED BIT(0) #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END() 0 +#define END(x) 0, (x) { const u32 base = engine->mmio_base; @@ -507,7 +512,10 @@ static u32 *set_offsets(u32 *regs, u8 count, flags; if (*data & BIT(7)) { /* skip */ - regs += *data++ & ~BIT(7); + count = *data++ & ~BIT(7); + if (clear) + memset32(regs, MI_NOOP, count); + regs += count; continue; } @@ -533,12 +541,25 @@ static u32 *set_offsets(u32 *regs, offset |= v & ~BIT(7); } while (v & BIT(7)); - *regs = base + (offset << 2); + regs[0] = base + (offset << 2); + if (clear) + regs[1] = 0; regs += 2; } while (--count); } - return regs; + if (clear) { + u8 count = *++data; + + /* Clear past the tail for HW access */ + GEM_BUG_ON(dword_in_page(regs) > count); + memset32(regs, MI_NOOP, count - dword_in_page(regs)); + + /* Close the batch; used mainly by live_lrc_layout() */ + *regs = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *regs |= BIT(0); + } } static const u8 gen8_xcs_offsets[] = { @@ -573,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = { REG16(0x200), REG(0x028), - END(), + END(80) }; static const u8 gen9_xcs_offsets[] = { @@ -657,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = { REG16(0x67c), REG(0x068), - END(), + END(176) }; static const u8 gen12_xcs_offsets[] = { @@ -689,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END(), + END(80) }; static const u8 gen8_rcs_offsets[] = { @@ -726,7 +747,91 @@ static const u8 gen8_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) +}; + +static const u8 gen9_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x34), + REG(0x30), + REG(0x38), + REG(0x3c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0xc8), + + NOP(13), + LRI(44, POSTED), + REG(0x28), + REG(0x9c), + REG(0xc0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x68), + + END(176) }; static const u8 gen11_rcs_offsets[] = { @@ -767,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; static const u8 gen12_rcs_offsets[] = { @@ -808,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; #undef END @@ -833,6 +938,8 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) return gen12_rcs_offsets; else if (INTEL_GEN(engine->i915) >= 11) return gen11_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_rcs_offsets; else return gen8_rcs_offsets; } else { @@ -880,7 +987,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { - struct intel_engine_cs *owner = rq->hw_context->engine; + struct intel_engine_cs *owner = rq->context->engine; /* * Decouple the virtual breadcrumb before moving it @@ -983,6 +1090,58 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + +static void +execlists_check_context(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + const struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + bool valid = true; + int x; + + if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { + pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_START], + i915_ggtt_offset(ring->vma)); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + valid = false; + } + + if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != + (RING_CTL_SIZE(ring->size) | RING_VALID)) { + pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_CTL], + (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + valid = false; + } + + x = lrc_ring_mi_mode(engine); + if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { + pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", + engine->name, regs[x + 1]); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; + valid = false; + } + + WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); +} + static void restore_default_state(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -999,7 +1158,7 @@ static void restore_default_state(struct intel_context *ce, static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; u32 head; /* @@ -1017,8 +1176,8 @@ static void reset_active(struct i915_request *rq, * remain correctly ordered. And we defer to __i915_request_submit() * so that all asynchronous waits are correctly handled. */ - GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", - __func__, engine->name, rq->fence.context, rq->fence.seqno); + ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", + rq->fence.context, rq->fence.seqno); /* On resubmission of the active request, payload will be scrubbed */ if (i915_request_completed(rq)) @@ -1040,13 +1199,16 @@ static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { struct intel_engine_cs * const engine = rq->engine; - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; intel_context_get(ce); - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + if (unlikely(intel_context_is_banned(ce))) reset_active(rq, engine); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + execlists_check_context(ce, engine); + if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; @@ -1054,12 +1216,12 @@ __execlists_schedule_in(struct i915_request *rq) /* We don't need a strict matching tag, just different values */ ce->lrc_desc &= ~GENMASK_ULL(47, 37); ce->lrc_desc |= - (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << + (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << GEN11_SW_CTX_ID_SHIFT; BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); } - intel_gt_pm_get(engine->gt); + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1069,7 +1231,7 @@ __execlists_schedule_in(struct i915_request *rq) static inline struct i915_request * execlists_schedule_in(struct i915_request *rq, int idx) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; struct intel_engine_cs *old; GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); @@ -1100,7 +1262,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; /* * NB process_csb() is not under the engine->active.lock and hence @@ -1138,7 +1300,7 @@ __execlists_schedule_out(struct i915_request *rq, static inline void execlists_schedule_out(struct i915_request *rq) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; trace_i915_request_out(rq); @@ -1155,7 +1317,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; u64 desc = ce->lrc_desc; u32 tail; @@ -1186,17 +1348,8 @@ static u64 execlists_update_context(struct i915_request *rq) * may not be visible to the HW prior to the completion of the UC * register write and that we may begin execution from the context * before its image is complete leading to invalid PD chasing. - * - * Furthermore, Braswell, at least, wants a full mb to be sure that - * the writes are coherent in memory (visible to the GPU) prior to - * execution, and not just visible to other CPUs (as is the result of - * wmb). */ - mb(); - - /* Wa_1607138340:tgl */ - if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) - desc |= CTX_DESC_FORCE_RESTORE; + wmb(); ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; return desc; @@ -1224,15 +1377,14 @@ trace_ports(const struct intel_engine_execlists *execlists, if (!ports[0]) return; - GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", - engine->name, msg, - ports[0]->fence.context, - ports[0]->fence.seqno, - i915_request_completed(ports[0]) ? "!" : - i915_request_started(ports[0]) ? "*" : - "", - ports[1] ? ports[1]->fence.context : 0, - ports[1] ? ports[1]->fence.seqno : 0); + ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg, + ports[0]->fence.context, + ports[0]->fence.seqno, + i915_request_completed(ports[0]) ? "!" : + i915_request_started(ports[0]) ? "*" : + "", + ports[1] ? ports[1]->fence.context : 0, + ports[1] ? ports[1]->fence.seqno : 0); } static __maybe_unused bool @@ -1256,33 +1408,56 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) { - GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + unsigned long flags; + bool ok = true; + + GEM_BUG_ON(!kref_read(&rq->fence.refcount)); + GEM_BUG_ON(!i915_request_is_active(rq)); + + if (ce == rq->context) { + GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); return false; } + ce = rq->context; - ce = rq->hw_context; - if (i915_request_completed(rq)) + /* Hold tightly onto the lock to prevent concurrent retires! */ + if (!spin_trylock_irqsave(&rq->lock, flags)) continue; - if (i915_active_is_idle(&ce->active)) { - GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + if (i915_request_completed(rq)) + goto unlock; + + if (i915_active_is_idle(&ce->active) && + !intel_context_is_barrier(ce)) { + GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); - return false; + ok = false; + goto unlock; } if (!i915_vma_is_pinned(ce->state)) { - GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); - return false; + ok = false; + goto unlock; } if (!i915_vma_is_pinned(ce->ring->vma)) { - GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", + ce->timeline->fence_context, port - execlists->pending); - return false; + ok = false; + goto unlock; } + +unlock: + spin_unlock_irqrestore(&rq->lock, flags); + if (!ok) + return false; } return ce; @@ -1327,7 +1502,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ce->gem_context)); + intel_context_force_single_submission(ce)); } static bool can_merge_ctx(const struct intel_context *prev, @@ -1359,11 +1534,11 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely((prev->flags ^ next->flags) & - (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + if (unlikely((prev->fence.flags ^ next->fence.flags) & + (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL))) return false; - if (!can_merge_ctx(prev->hw_context, next->hw_context)) + if (!can_merge_ctx(prev->context, next->context)) return false; return true; @@ -1372,7 +1547,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - set_offsets(regs, reg_offsets(engine), engine); + set_offsets(regs, reg_offsets(engine), engine, false); } static bool virtual_matches(const struct virtual_engine *ve, @@ -1411,7 +1586,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, if (!list_empty(&ve->context.signal_link)) { list_move_tail(&ve->context.signal_link, &engine->breadcrumbs.signalers); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } spin_unlock(&old->breadcrumbs.irq_lock); } @@ -1519,7 +1694,7 @@ active_timeslice(const struct intel_engine_cs *engine) { const struct i915_request *rq = *engine->execlists.active; - if (i915_request_completed(rq)) + if (!rq || i915_request_completed(rq)) return 0; if (engine->execlists.switch_priority_hint < effective_prio(rq)) @@ -1550,7 +1725,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) return 0; /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(i915_gem_context_is_banned(rq->gem_context))) + if (unlikely(intel_context_is_banned(rq->context))) return 1; return READ_ONCE(engine->props.preempt_timeout_ms); @@ -1565,6 +1740,11 @@ static void set_preempt_timeout(struct intel_engine_cs *engine) active_preempt_timeout(engine)); } +static inline void clear_ports(struct i915_request **ports, int count) +{ + memset_p((void **)ports, NULL, count); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1627,12 +1807,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = last_active(execlists); if (last) { if (need_preempt(engine, last, rb)) { - GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", - engine->name, - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint); + ENGINE_TRACE(engine, + "preempting last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); record_preemption(execlists); /* @@ -1658,16 +1838,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * tendency to ignore us rewinding the TAIL to the * end of an earlier request. */ - last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && timer_expired(&engine->execlists.timer)) { - GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", - engine->name, - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint); + ENGINE_TRACE(engine, + "expired last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); ring_set_paused(engine, 1); defer_active(engine); @@ -1730,7 +1910,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq != ve->request); GEM_BUG_ON(rq->engine != &ve->base); - GEM_BUG_ON(rq->hw_context != &ve->context); + GEM_BUG_ON(rq->context != &ve->context); if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { @@ -1744,14 +1924,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) return; /* leave this for another */ } - GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", - engine->name, - rq->fence.context, - rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - yesno(engine != ve->siblings[0])); + ENGINE_TRACE(engine, + "virtual rq=%llx:%lld%s, new engine? %s\n", + rq->fence.context, + rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); ve->request = NULL; ve->base.execlists.queue_priority_hint = INT_MIN; @@ -1849,7 +2029,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * same LRCA, i.e. we must submit 2 different * contexts if we submit 2 ELSP. */ - if (last->hw_context == rq->hw_context) + if (last->context == rq->context) goto done; if (i915_request_has_sentinel(last)) @@ -1862,8 +2042,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->hw_context) || - ctx_single_port_submission(rq->hw_context)) + if (ctx_single_port_submission(last->context) || + ctx_single_port_submission(rq->context)) goto done; merge = false; @@ -1877,8 +2057,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(last && - !can_merge_ctx(last->hw_context, - rq->hw_context)); + !can_merge_ctx(last->context, + rq->context)); submit = true; last = rq; @@ -1907,9 +2087,6 @@ done: * interrupt for secondary ports). */ execlists->queue_priority_hint = queue_prio(execlists); - GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n", - engine->name, execlists->queue_priority_hint, - yesno(submit)); if (submit) { *port = execlists_schedule_in(last, port - execlists->pending); @@ -1928,10 +2105,9 @@ done: goto skip_submit; } + clear_ports(port + 1, last_port - port); - memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); - set_preempt_timeout(engine); } else { skip_submit: @@ -1946,13 +2122,14 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) for (port = execlists->pending; *port; port++) execlists_schedule_out(*port); - memset(execlists->pending, 0, sizeof(execlists->pending)); + clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); /* Mark the end of active before we overwrite *active */ for (port = xchg(&execlists->active, execlists->pending); *port; port++) execlists_schedule_out(*port); - WRITE_ONCE(execlists->active, - memset(execlists->inflight, 0, sizeof(execlists->inflight))); + clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); + + WRITE_ONCE(execlists->active, execlists->inflight); } static inline void @@ -2058,7 +2235,7 @@ static void process_csb(struct intel_engine_cs *engine) */ head = execlists->csb_head; tail = READ_ONCE(*execlists->csb_write); - GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); if (unlikely(head == tail)) return; @@ -2096,9 +2273,8 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n", - engine->name, head, - buf[2 * head + 0], buf[2 * head + 1]); + ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", + head, buf[2 * head + 0], buf[2 * head + 1]); if (INTEL_GEN(engine->i915) >= 12) promote = gen12_csb_parse(execlists, buf + 2 * head); @@ -2109,7 +2285,6 @@ static void process_csb(struct intel_engine_cs *engine) /* Point active to the new ELSP; prevent overwriting */ WRITE_ONCE(execlists->active, execlists->pending); - set_timeslice(engine); if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); @@ -2150,6 +2325,7 @@ static void process_csb(struct intel_engine_cs *engine) } while (head != tail); execlists->csb_head = head; + set_timeslice(engine); /* * Gen11 has proven to fail wrt global observation point between @@ -2189,10 +2365,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine) /* Mark this tasklet as disabled to avoid waiting for it to complete */ tasklet_disable_nosync(&engine->execlists.tasklet); - GEM_TRACE("%s: preempt timeout %lu+%ums\n", - engine->name, - READ_ONCE(engine->props.preempt_timeout_ms), - jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); + ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", + READ_ONCE(engine->props.preempt_timeout_ms), + jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); intel_engine_reset(engine, "preemption time out"); tasklet_enable(&engine->execlists.tasklet); @@ -2333,7 +2508,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine) vaddr += engine->context_size; - memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); + memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); } static void @@ -2344,7 +2519,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) vaddr += engine->context_size; - if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) + if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) dev_err_once(engine->i915->drm.dev, "%s context redzone overwritten!\n", engine->name); @@ -2369,7 +2544,7 @@ __execlists_update_reg_state(const struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); regs[CTX_RING_HEAD] = ring->head; regs[CTX_RING_TAIL] = ring->tail; @@ -2387,33 +2562,21 @@ __execlists_context_pin(struct intel_context *ce, struct intel_engine_cs *engine) { void *vaddr; - int ret; GEM_BUG_ON(!ce->state); - - ret = intel_context_active_acquire(ce); - if (ret) - goto err; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); vaddr = i915_gem_object_pin_map(ce->state->obj, i915_coherent_map_type(engine->i915) | I915_MAP_OVERRIDE); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto unpin_active; - } + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine); + ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; - -unpin_active: - intel_context_active_release(ce); -err: - return ret; } static int execlists_context_pin(struct intel_context *ce) @@ -2428,6 +2591,9 @@ static int execlists_context_alloc(struct intel_context *ce) static void execlists_context_reset(struct intel_context *ce) { + CE_TRACE(ce, "reset\n"); + GEM_BUG_ON(!intel_context_is_pinned(ce)); + /* * Because we emit WA_TAIL_DWORDS there may be a disparity * between our bookkeeping in ce->ring->head and ce->ring->tail and @@ -2444,8 +2610,14 @@ static void execlists_context_reset(struct intel_context *ce) * So to avoid that we reset the context images upon resume. For * simplicity, we just zero everything out. */ - intel_ring_reset(ce->ring, 0); + intel_ring_reset(ce->ring, ce->ring->emit); + + /* Scrub away the garbage */ + execlists_init_reg_state(ce->lrc_reg_state, + ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine); + + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -2497,7 +2669,7 @@ static int execlists_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); /* * Flush enough space to reduce the likelihood of waiting after @@ -2867,6 +3039,8 @@ static void enable_execlists(struct intel_engine_cs *engine) RING_HWS_PGA, i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); + + engine->context_tag = 0; } static bool unexpected_starting_state(struct intel_engine_cs *engine) @@ -2906,8 +3080,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; unsigned long flags; - GEM_TRACE("%s: depth<-%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth<-%d\n", + atomic_read(&execlists->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2960,26 +3134,20 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + invalidate_csb_entries(&execlists->csb_status[0], &execlists->csb_status[reset_value]); } -static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x54; - else if (engine->class == RENDER_CLASS) - return 0x58; - else - return -1; -} - -static void __execlists_reset_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine) +static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) { - u32 *regs = ce->lrc_reg_state; int x; x = lrc_ring_mi_mode(engine); @@ -2989,6 +3157,14 @@ static void __execlists_reset_reg_state(const struct intel_context *ce, } } +static void __execlists_reset_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + __reset_stop_ring(regs, engine); +} + static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -3016,7 +3192,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) /* We still have requests in-flight; the engine should be active */ GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - ce = rq->hw_context; + ce = rq->context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); if (i915_request_completed(rq)) { @@ -3073,8 +3249,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) restore_default_state(ce, engine); out_replay: - GEM_TRACE("%s replay {head:%04x, tail:%04x}\n", - engine->name, ce->ring->head, ce->ring->tail); + ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", + ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); @@ -3086,11 +3262,11 @@ unwind: __unwind_incomplete_requests(engine); } -static void execlists_reset(struct intel_engine_cs *engine, bool stalled) +static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) { unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); spin_lock_irqsave(&engine->active.lock, flags); @@ -3104,14 +3280,14 @@ static void nop_submission_tasklet(unsigned long data) /* The driver is wedged; don't process any more events. */ } -static void execlists_cancel_requests(struct intel_engine_cs *engine) +static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -3198,13 +3374,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) if (__tasklet_enable(&execlists->tasklet)) /* And kick in case we missed a new request submission. */ tasklet_hi_schedule(&execlists->tasklet); - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); } -static int gen8_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +static int gen8_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { u32 *cs; @@ -3238,7 +3414,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, return 0; } -static int gen9_emit_bb_start(struct i915_request *rq, +static int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) { @@ -3693,12 +3869,12 @@ static void execlists_park(struct intel_engine_cs *engine) void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->cancel_requests = execlists_cancel_requests; engine->schedule = i915_schedule; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; + engine->reset.rewind = execlists_reset_rewind; + engine->reset.cancel = execlists_reset_cancel; engine->reset.finish = execlists_reset_finish; engine->park = execlists_park; @@ -3713,13 +3889,27 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; + + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; } -static void execlists_destroy(struct intel_engine_cs *engine) +static void execlists_shutdown(struct intel_engine_cs *engine) { + /* Synchronise with residual timers and any softirq they raise */ + del_timer_sync(&engine->execlists.timer); + del_timer_sync(&engine->execlists.preempt); + tasklet_kill(&engine->execlists.tasklet); +} + +static void execlists_release(struct intel_engine_cs *engine) +{ + execlists_shutdown(engine); + intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); - kfree(engine); } static void @@ -3727,13 +3917,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->destroy = execlists_destroy; engine->resume = execlists_resume; - engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; - engine->reset.finish = execlists_reset_finish; - engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; @@ -3756,10 +3941,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) * until a more refined solution exists. */ } - if (IS_GEN(engine->i915, 8)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen9_emit_bb_start; } static inline void @@ -3803,6 +3984,11 @@ static void rcs_submission_override(struct intel_engine_cs *engine) int intel_execlists_submission_setup(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; + u32 base = engine->mmio_base; + tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); timer_setup(&engine->execlists.timer, execlists_timeslice, 0); @@ -3814,21 +4000,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) if (engine->class == RENDER_CLASS) rcs_submission_override(engine); - return 0; -} - -int intel_execlists_submission_init(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 base = engine->mmio_base; - int ret; - - ret = intel_engine_init_common(engine); - if (ret) - return ret; - if (intel_init_workaround_bb(engine)) /* * We continue even if we fail to initialize WA batch @@ -3860,6 +4031,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) reset_csb_pointers(engine); + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = execlists_release; + return 0; } @@ -3899,18 +4073,21 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, - const struct intel_ring *ring) + const struct intel_ring *ring, + bool inhibit) { - regs[CTX_CONTEXT_CONTROL] = - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + u32 ctl; + + ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (inhibit) + ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; if (INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + regs[CTX_CONTEXT_CONTROL] = ctl; - regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; - regs[CTX_BB_STATE] = RING_BB_PPGTT; + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; } static void init_wa_bb_reg_state(u32 * const regs, @@ -3966,7 +4143,7 @@ static void execlists_init_reg_state(u32 *regs, const struct intel_context *ce, const struct intel_engine_cs *engine, const struct intel_ring *ring, - bool close) + bool inhibit) { /* * A context is actually a big batch buffer with several @@ -3978,21 +4155,17 @@ static void execlists_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); - - if (close) { /* Close the batch; used mainly by live_lrc_layout() */ - *bbe = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - *bbe |= BIT(0); - } + set_offsets(regs, reg_offsets(engine), engine, inhibit); - init_common_reg_state(regs, engine, ring); + init_common_reg_state(regs, engine, ring, inhibit); init_ppgtt_reg_state(regs, vm_alias(ce->vm)); init_wa_bb_reg_state(regs, engine, INTEL_GEN(engine->i915) >= 12 ? GEN12_CTX_BB_PER_CTX_PTR : CTX_BB_PER_CTX_PTR); + + __reset_stop_ring(regs, engine); } static int @@ -4003,7 +4176,6 @@ populate_lr_context(struct intel_context *ce, { bool inhibit = true; void *vaddr; - u32 *regs; int ret; vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); @@ -4027,16 +4199,14 @@ populate_lr_context(struct intel_context *ce, memcpy(vaddr, defaults, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring, inhibit); - if (inhibit) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ce, engine, ring, inhibit); ret = 0; err_unpin_ctx: @@ -4174,6 +4344,13 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) ve->siblings[0]); } +static int virtual_context_alloc(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + return __execlists_context_alloc(ce, ve->siblings[0]); +} + static int virtual_context_pin(struct intel_context *ce) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); @@ -4211,6 +4388,8 @@ static void virtual_context_exit(struct intel_context *ce) } static const struct intel_context_ops virtual_context_ops = { + .alloc = virtual_context_alloc, + .pin = virtual_context_pin, .unpin = execlists_context_unpin, @@ -4237,10 +4416,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = ve->siblings[0]->mask; } - GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n", - ve->base.name, - rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", + rq->fence.context, rq->fence.seqno, + mask, ve->base.execlists.queue_priority_hint); return mask; } @@ -4331,10 +4509,9 @@ static void virtual_submit_request(struct i915_request *rq) struct i915_request *old; unsigned long flags; - GEM_TRACE("%s: rq=%llx:%lld\n", - ve->base.name, - rq->fence.context, - rq->fence.seqno); + ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", + rq->fence.context, + rq->fence.seqno); GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); @@ -4402,8 +4579,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) } struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, +intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) { struct virtual_engine *ve; @@ -4414,13 +4590,13 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, return ERR_PTR(-EINVAL); if (count == 1) - return intel_context_create(ctx, siblings[0]); + return intel_context_create(siblings[0]); ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); if (!ve) return ERR_PTR(-ENOMEM); - ve->base.i915 = ctx->i915; + ve->base.i915 = siblings[0]->i915; ve->base.gt = siblings[0]->gt; ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; @@ -4449,7 +4625,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_breadcrumbs(&ve->base); - intel_engine_init_execlists(&ve->base); ve->base.cops = &virtual_context_ops; @@ -4465,7 +4640,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, virtual_submission_tasklet, (unsigned long)ve); - intel_context_init(&ve->context, ctx, &ve->base); + intel_context_init(&ve->context, &ve->base); for (n = 0; n < count; n++) { struct intel_engine_cs *sibling = siblings[n]; @@ -4532,12 +4707,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.flags |= I915_ENGINE_IS_VIRTUAL; - err = __execlists_context_alloc(&ve->context, siblings[0]); - if (err) - goto err_put; - - __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags); - return &ve->context; err_put: @@ -4546,14 +4715,12 @@ err_put: } struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src) +intel_execlists_clone_virtual(struct intel_engine_cs *src) { struct virtual_engine *se = to_virtual_engine(src); struct intel_context *dst; - dst = intel_execlists_create_virtual(ctx, - se->siblings, + dst = intel_execlists_create_virtual(se->siblings, se->num_siblings); if (IS_ERR(dst)) return dst; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 04511d8ebdc1..dfbc214e14f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -83,7 +83,6 @@ enum { void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int intel_execlists_submission_setup(struct intel_engine_cs *engine); -int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ /* At the start of the context image is its per-process HWS page */ @@ -111,13 +110,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max); struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, +intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src); +intel_execlists_clone_virtual(struct intel_engine_cs *src); int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 06ab0276e10e..08a3be65f700 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -13,8 +13,8 @@ #define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_RING_HEAD (0x04 + 1) #define CTX_RING_TAIL (0x06 + 1) -#define CTX_RING_BUFFER_START (0x08 + 1) -#define CTX_RING_BUFFER_CONTROL (0x0a + 1) +#define CTX_RING_START (0x08 + 1) +#define CTX_RING_CTL (0x0a + 1) #define CTX_BB_STATE (0x10 + 1) #define CTX_BB_PER_CTX_PTR (0x18 + 1) #define CTX_PDP3_UDW (0x24 + 1) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 2b977991b785..eeef90b55c64 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -127,7 +127,7 @@ struct drm_i915_mocs_table { LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ L3_3_WB) -static const struct drm_i915_mocs_entry skylake_mocs_table[] = { +static const struct drm_i915_mocs_entry skl_mocs_table[] = { GEN9_MOCS_ENTRIES, MOCS_ENTRY(I915_MOCS_CACHED, LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), @@ -233,7 +233,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ L3_1_UC) -static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { +static const struct drm_i915_mocs_entry tgl_mocs_table[] = { /* Base - Error (Reserved for Non-Use) */ MOCS_ENTRY(0, 0x0, 0x0), /* Base - Reserved */ @@ -267,7 +267,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { L3_3_WB), }; -static const struct drm_i915_mocs_entry icelake_mocs_table[] = { +static const struct drm_i915_mocs_entry icl_mocs_table[] = { /* Base - Uncached (Deprecated) */ MOCS_ENTRY(I915_MOCS_UNCACHED, LE_1_UC | LE_TC_1_LLC, @@ -283,65 +283,42 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = { static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { - bool result = false; - if (INTEL_GEN(i915) >= 12) { - table->size = ARRAY_SIZE(tigerlake_mocs_table); - table->table = tigerlake_mocs_table; + table->size = ARRAY_SIZE(tgl_mocs_table); + table->table = tgl_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; - result = true; } else if (IS_GEN(i915, 11)) { - table->size = ARRAY_SIZE(icelake_mocs_table); - table->table = icelake_mocs_table; + table->size = ARRAY_SIZE(icl_mocs_table); + table->table = icl_mocs_table; table->n_entries = GEN11_NUM_MOCS_ENTRIES; - result = true; } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { - table->size = ARRAY_SIZE(skylake_mocs_table); + table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; - table->table = skylake_mocs_table; - result = true; + table->table = skl_mocs_table; } else if (IS_GEN9_LP(i915)) { table->size = ARRAY_SIZE(broxton_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; - result = true; } else { WARN_ONCE(INTEL_GEN(i915) >= 9, "Platform that should have a MOCS table does not.\n"); + return false; } + if (GEM_DEBUG_WARN_ON(table->size > table->n_entries)) + return false; + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN(i915, 9)) { int i; for (i = 0; i < table->size; i++) - if (WARN_ON(table->table[i].l3cc_value & - (L3_ESC(1) | L3_SCC(0x7)))) + if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value & + (L3_ESC(1) | L3_SCC(0x7)))) return false; } - return result; -} - -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) -{ - switch (engine->id) { - case RCS0: - return GEN9_GFX_MOCS(index); - case VCS0: - return GEN9_MFX0_MOCS(index); - case BCS0: - return GEN9_BLT_MOCS(index); - case VECS0: - return GEN9_VEBOX_MOCS(index); - case VCS1: - return GEN9_MFX1_MOCS(index); - case VCS2: - return GEN11_MFX2_MOCS(index); - default: - MISSING_CASE(engine->id); - return INVALID_MMIO_REG; - } + return true; } /* @@ -351,29 +328,47 @@ static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].control_value; return table->table[I915_MOCS_PTE].control_value; } -static void init_mocs_table(struct intel_engine_cs *engine, - const struct drm_i915_mocs_table *table) +#define for_each_mocs(mocs, t, i) \ + for (i = 0; \ + i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\ + i++) + +static void __init_mocs_table(struct intel_uncore *uncore, + const struct drm_i915_mocs_table *table, + u32 addr) { - struct intel_uncore *uncore = engine->uncore; - u32 unused_value = table->table[I915_MOCS_PTE].control_value; unsigned int i; + u32 mocs; + + for_each_mocs(mocs, table, i) + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); +} - for (i = 0; i < table->size; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - get_entry_control(table, i)); +static u32 mocs_offset(const struct intel_engine_cs *engine) +{ + static const u32 offset[] = { + [RCS0] = __GEN9_RCS0_MOCS0, + [VCS0] = __GEN9_VCS0_MOCS0, + [VCS1] = __GEN9_VCS1_MOCS0, + [VECS0] = __GEN9_VECS0_MOCS0, + [BCS0] = __GEN9_BCS0_MOCS0, + [VCS2] = __GEN11_VCS2_MOCS0, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset)); + return offset[engine->id]; +} - /* All remaining entries are unused */ - for (; i < table->n_entries; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - unused_value); +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) +{ + __init_mocs_table(engine->uncore, table, mocs_offset(engine)); } /* @@ -383,51 +378,34 @@ static void init_mocs_table(struct intel_engine_cs *engine, static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; return table->table[I915_MOCS_PTE].l3cc_value; } -static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, - u16 low, - u16 high) +static inline u32 l3cc_combine(u16 low, u16 high) { return low | (u32)high << 16; } +#define for_each_l3cc(l3cc, t, i) \ + for (i = 0; \ + i < ((t)->n_entries + 1) / 2 ? \ + (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \ + get_entry_l3cc((t), 2 * i + 1))), 1 : \ + 0; \ + i++) + static void init_l3cc_table(struct intel_engine_cs *engine, const struct drm_i915_mocs_table *table) { struct intel_uncore *uncore = engine->uncore; - u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value; unsigned int i; + u32 l3cc; - for (i = 0; i < table->size / 2; i++) { - u16 low = get_entry_l3cc(table, 2 * i); - u16 high = get_entry_l3cc(table, 2 * i + 1); - - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(table, low, high)); - } - - /* Odd table size - 1 left over */ - if (table->size & 1) { - u16 low = get_entry_l3cc(table, 2 * i); - - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(table, low, unused_value)); - i++; - } - - /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(table, unused_value, - unused_value)); + for_each_l3cc(l3cc, table, i) + intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc); } void intel_mocs_init_engine(struct intel_engine_cs *engine) @@ -448,11 +426,14 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_l3cc_table(engine, &table); } -static void intel_mocs_init_global(struct intel_gt *gt) +static u32 global_mocs_offset(void) +{ + return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); +} + +static void init_global_mocs(struct intel_gt *gt) { - struct intel_uncore *uncore = gt->uncore; struct drm_i915_mocs_table table; - unsigned int index; /* * LLC and eDRAM control values are not applicable to dgfx @@ -460,32 +441,18 @@ static void intel_mocs_init_global(struct intel_gt *gt) if (IS_DGFX(gt->i915)) return; - GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); - if (!get_mocs_settings(gt->i915, &table)) return; - if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) - return; - - for (index = 0; index < table.size; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[index].control_value); - - /* - * Ok, now set the unused entries to the invalid entry (index 0). These - * entries are officially undefined and no contract for the contents and - * settings is given for these entries. - */ - for (; index < table.n_entries; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[0].control_value); + __init_mocs_table(gt->uncore, &table, global_mocs_offset()); } void intel_mocs_init(struct intel_gt *gt) { if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) - intel_mocs_init_global(gt); + init_global_mocs(gt); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_mocs.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c new file mode 100644 index 000000000000..f86f7e68ce5e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/slab.h> + +#include "i915_trace.h" +#include "intel_gtt.h" +#include "gen6_ppgtt.h" +#include "gen8_ppgtt.h" + +struct i915_page_table *alloc_pt(struct i915_address_space *vm) +{ + struct i915_page_table *pt; + + pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL); + if (unlikely(!pt)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, &pt->base))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + + atomic_set(&pt->used, 0); + return pt; +} + +struct i915_page_directory *__alloc_pd(size_t sz) +{ + struct i915_page_directory *pd; + + pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); + if (unlikely(!pd)) + return NULL; + + spin_lock_init(&pd->lock); + return pd; +} + +struct i915_page_directory *alloc_pd(struct i915_address_space *vm) +{ + struct i915_page_directory *pd; + + pd = __alloc_pd(sizeof(*pd)); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + return pd; +} + +void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) +{ + cleanup_page_dma(vm, pd); + kfree(pd); +} + +static inline void +write_dma_entry(struct i915_page_dma * const pdma, + const unsigned short idx, + const u64 encoded_entry) +{ + u64 * const vaddr = kmap_atomic(pdma->page); + + vaddr[idx] = encoded_entry; + kunmap_atomic(vaddr); +} + +void +__set_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_dma * const to, + u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) +{ + /* Each thread pre-pins the pd, and we may have a thread per pde. */ + GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry)); + + atomic_inc(px_used(pd)); + pd->entry[idx] = to; + write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); +} + +void +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + const struct i915_page_scratch * const scratch) +{ + GEM_BUG_ON(atomic_read(px_used(pd)) == 0); + + write_dma_entry(px_base(pd), idx, scratch->encode); + pd->entry[idx] = NULL; + atomic_dec(px_used(pd)); +} + +bool +release_pd_entry(struct i915_page_directory * const pd, + const unsigned short idx, + struct i915_page_table * const pt, + const struct i915_page_scratch * const scratch) +{ + bool free = false; + + if (atomic_add_unless(&pt->used, -1, 1)) + return false; + + spin_lock(&pd->lock); + if (atomic_dec_and_test(&pt->used)) { + clear_pd_entry(pd, idx, scratch); + free = true; + } + spin_unlock(&pd->lock); + + return free; +} + +int i915_ppgtt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + gtt_write_workarounds(gt); + + if (IS_GEN(i915, 6)) + gen6_ppgtt_enable(gt); + else if (IS_GEN(i915, 7)) + gen7_ppgtt_enable(gt); + + return 0; +} + +static struct i915_ppgtt * +__ppgtt_create(struct intel_gt *gt) +{ + if (INTEL_GEN(gt->i915) < 8) + return gen6_ppgtt_create(gt); + else + return gen8_ppgtt_create(gt); +} + +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) +{ + struct i915_ppgtt *ppgtt; + + ppgtt = __ppgtt_create(gt); + if (IS_ERR(ppgtt)) + return ppgtt; + + trace_i915_ppgtt_create(&ppgtt->vm); + + return ppgtt; +} + +static int ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + u32 pte_flags; + int err; + + if (flags & I915_VMA_ALLOC) { + err = vma->vm->allocate_va_range(vma->vm, + vma->node.start, vma->size); + if (err) + return err; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + } + + /* Applicable to VLV, and gen8+ */ + pte_flags = 0; + if (i915_gem_object_is_readonly(vma->obj)) + pte_flags |= PTE_READ_ONLY; + + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + wmb(); + + return 0; +} + +static void ppgtt_unbind_vma(struct i915_vma *vma) +{ + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); +} + +int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + ppgtt->vm.gt = gt; + ppgtt->vm.i915 = i915; + ppgtt->vm.dma = &i915->drm.pdev->dev; + ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); + + ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; + ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; + ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; + ppgtt->vm.vma_ops.clear_pages = clear_pages; +} diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 700104b90163..9e303c29d6e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -88,21 +88,18 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * do not want the enable hysteresis to less than the wakeup latency. * * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). + * service latency, and puts it under 10us for Icelake, similar to + * Broadwell+, To be conservative, we want to factor in a context + * switch on top (due to ksoftirqd). */ - set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); /* 3a: Enable RC6 */ - set(uncore, GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1)); + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); set(uncore, GEN9_PG_ENABLE, GEN9_RENDER_PG_ENABLE | @@ -173,10 +170,10 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) else rc6_mode = GEN6_RC_CTL_EI_MODE(1); - set(uncore, GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - rc6_mode); + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode; /* * WaRsDisableCoarsePowerGating:skl,cnl @@ -203,10 +200,10 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - set(uncore, GEN6_RC_CONTROL, + rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_RC6_ENABLE); + GEN6_RC_CTL_RC6_ENABLE; } static void gen6_rc6_enable(struct intel_rc6 *rc6) @@ -242,10 +239,10 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; if (HAS_RC6pp(i915)) rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - set(uncore, GEN6_RC_CONTROL, + rc6->ctl_enable = rc6_mask | GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); + GEN6_RC_CTL_HW_ENABLE; rc6vids = 0; ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, @@ -363,7 +360,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6) VLV_RENDER_RC6_COUNT_EN)); /* 3: Enable RC6 */ - set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); + rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; } static void vlv_rc6_enable(struct intel_rc6 *rc6) @@ -389,8 +386,8 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - set(uncore, GEN6_RC_CONTROL, - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); + rc6->ctl_enable = + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; } static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) @@ -491,64 +488,19 @@ static void rpm_put(struct intel_rc6 *rc6) rc6->wakeref = false; } -static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6) -{ - return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO); -} - -static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6) +static bool pctx_corrupted(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) - return; - - if (intel_rc6_ctx_corrupted(rc6)) { - DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); - rc6->ctx_corrupted = true; - } -} - -/** - * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA - * @rc6: rc6 state - * - * Perform any steps needed to re-init the RC6 CTX WA after system resume. - */ -void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6) -{ - if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) { - DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); - rc6->ctx_corrupted = false; - } -} - -/** - * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption - * @rc6: rc6 state - * - * Check if an RC6 CTX corruption has happened since the last check and if so - * disable RC6 and runtime power management. -*/ -void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6) -{ - struct drm_i915_private *i915 = rc6_to_i915(rc6); - - if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) - return; - - if (rc6->ctx_corrupted) - return; - - if (!intel_rc6_ctx_corrupted(rc6)) - return; - - DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + return false; - intel_rc6_disable(rc6); - rc6->ctx_corrupted = true; + if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO)) + return false; - return; + dev_notice(i915->drm.dev, + "RC6 context corruption, disabling runtime power management\n"); + return true; } static void __intel_rc6_disable(struct intel_rc6 *rc6) @@ -575,8 +527,6 @@ void intel_rc6_init(struct intel_rc6 *rc6) if (!rc6_supported(rc6)) return; - intel_rc6_ctx_wa_init(rc6); - if (IS_CHERRYVIEW(i915)) err = chv_rc6_init(rc6); else if (IS_VALLEYVIEW(i915)) @@ -611,9 +561,6 @@ void intel_rc6_enable(struct intel_rc6 *rc6) GEM_BUG_ON(rc6->enabled); - if (rc6->ctx_corrupted) - return; - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (IS_CHERRYVIEW(i915)) @@ -629,13 +576,51 @@ void intel_rc6_enable(struct intel_rc6 *rc6) else if (INTEL_GEN(i915) >= 6) gen6_rc6_enable(rc6); + rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE; + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + rc6->ctl_enable = 0; + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (unlikely(pctx_corrupted(rc6))) + return; + /* rc6 is ready, runtime-pm is go! */ rpm_put(rc6); rc6->enabled = true; } +void intel_rc6_unpark(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->enabled) + return; + + /* Restore HW timers for automatic RC6 entry while busy */ + set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); +} + +void intel_rc6_park(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->enabled) + return; + + if (unlikely(pctx_corrupted(rc6))) { + intel_rc6_disable(rc6); + return; + } + + if (!rc6->manual) + return; + + /* Turn off the HW timers and go directly to rc6 */ + set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); + set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); +} + void intel_rc6_disable(struct intel_rc6 *rc6) { if (!rc6->enabled) @@ -785,3 +770,7 @@ u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) { return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_rc6.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 1370f6834a4c..9f0f23fca8af 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -15,6 +15,9 @@ struct intel_rc6; void intel_rc6_init(struct intel_rc6 *rc6); void intel_rc6_fini(struct intel_rc6 *rc6); +void intel_rc6_unpark(struct intel_rc6 *rc6); +void intel_rc6_park(struct intel_rc6 *rc6); + void intel_rc6_sanitize(struct intel_rc6 *rc6); void intel_rc6_enable(struct intel_rc6 *rc6); void intel_rc6_disable(struct intel_rc6 *rc6); @@ -22,7 +25,4 @@ void intel_rc6_disable(struct intel_rc6 *rc6); u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); -void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6); -void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6); - #endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index 89ad5697a8d4..bfbb623f7a4f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -18,12 +18,14 @@ struct intel_rc6 { u64 prev_hw_residency[4]; u64 cur_residency[4]; + u32 ctl_enable; + struct drm_i915_gem_object *pctx; bool supported : 1; bool enabled : 1; + bool manual : 1; bool wakeref : 1; - bool ctx_corrupted : 1; }; #endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index c4edc35e7d89..5954ecc3207f 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -29,16 +29,6 @@ #include "intel_renderstate.h" #include "intel_ring.h" -struct intel_renderstate { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 batch_offset; - u32 batch_size; - u32 aux_offset; - u32 aux_size; -}; - static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { @@ -84,11 +74,11 @@ static int render_state_setup(struct intel_renderstate *so, u32 *d; int ret; - ret = i915_gem_object_prepare_write(so->obj, &needs_clflush); + ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -166,7 +156,7 @@ static int render_state_setup(struct intel_renderstate *so, ret = 0; out: - i915_gem_object_finish_access(so->obj); + i915_gem_object_finish_access(so->vma->obj); return ret; err: @@ -177,61 +167,84 @@ err: #undef OUT_BATCH -int intel_renderstate_emit(struct i915_request *rq) +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = rq->engine; - struct intel_renderstate so = {}; /* keep the compiler happy */ + struct drm_i915_gem_object *obj; int err; - so.rodata = render_state_get_rodata(engine); - if (!so.rodata) + memset(so, 0, sizeof(*so)); + + so->rodata = render_state_get_rodata(engine); + if (!so->rodata) return 0; - if (so.rodata->batch_items * 4 > PAGE_SIZE) + if (so->rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(so.obj)) - return PTR_ERR(so.obj); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); - so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(so.vma)) { - err = PTR_ERR(so.vma); + so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so->vma)) { + err = PTR_ERR(so->vma); goto err_obj; } - err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) goto err_vma; - err = render_state_setup(&so, rq->i915); + err = render_state_setup(so, engine->i915); if (err) goto err_unpin; + return 0; + +err_unpin: + i915_vma_unpin(so->vma); +err_vma: + i915_vma_close(so->vma); +err_obj: + i915_gem_object_put(obj); + so->vma = NULL; + return err; +} + +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int err; + + if (!so->vma) + return 0; + err = engine->emit_bb_start(rq, - so.batch_offset, so.batch_size, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; - if (so.aux_size > 8) { + if (so->aux_size > 8) { err = engine->emit_bb_start(rq, - so.aux_offset, so.aux_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; } - i915_vma_lock(so.vma); - err = i915_request_await_object(rq, so.vma->obj, false); + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); if (err == 0) - err = i915_vma_move_to_active(so.vma, rq, 0); - i915_vma_unlock(so.vma); -err_unpin: - i915_vma_unpin(so.vma); -err_vma: - i915_vma_close(so.vma); -err_obj: - i915_gem_object_put(so.obj); + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + return err; } + +void intel_renderstate_fini(struct intel_renderstate *so) +{ + i915_vma_unpin_and_release(&so->vma, 0); +} diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 8d5079145054..5700be69a05a 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -27,6 +27,8 @@ #include <linux/types.h> struct i915_request; +struct intel_engine_cs; +struct i915_vma; struct intel_renderstate_rodata { const u32 *reloc; @@ -46,6 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state; extern const struct intel_renderstate_rodata gen8_null_state; extern const struct intel_renderstate_rodata gen9_null_state; -int intel_renderstate_emit(struct i915_request *rq); +struct intel_renderstate { + const struct intel_renderstate_rodata *rodata; + struct i915_vma *vma; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; +}; + +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine); +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq); +void intel_renderstate_fini(struct intel_renderstate *so); #endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index c97423a76642..beee0cf89bce 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -21,6 +21,7 @@ #include "intel_reset.h" #include "uc/intel_guc.h" +#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *hung_ctx = rq->gem_context; + struct intel_context *hung_ctx = rq->context; if (!i915_request_is_active(rq)) return; lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->gem_context == hung_ctx) + if (rq->context == hung_ctx) i915_request_skip(rq, -EIO); } -static void client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) +static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { - unsigned int score; + struct drm_i915_file_private *file_priv = ctx->file_priv; unsigned long prev_hang; + unsigned int score; + + if (IS_ERR_OR_NULL(file_priv)) + return; - if (i915_gem_context_is_banned(ctx)) + score = 0; + if (banned) score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; prev_hang = xchg(&file_priv->hang_timestamp, jiffies); if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) @@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static bool context_mark_guilty(struct i915_gem_context *ctx) +static bool mark_guilty(struct i915_request *rq) { + struct i915_gem_context *ctx; unsigned long prev_hang; bool banned; int i; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (!ctx) + return false; + + if (i915_gem_context_is_closed(ctx)) { + intel_context_set_banned(rq->context); + banned = true; + goto out; + } + atomic_inc(&ctx->guilty_count); /* Cool contexts are too cool to be banned! (Used for reset testing.) */ - if (!i915_gem_context_is_bannable(ctx)) - return false; + if (!i915_gem_context_is_bannable(ctx)) { + banned = false; + goto out; + } + + dev_notice(ctx->i915->drm.dev, + "%s context reset due to GPU hang\n", + ctx->name); /* Record the timestamp for the last N hangs */ prev_hang = ctx->hang_timestamp[0]; @@ -100,38 +124,43 @@ static bool context_mark_guilty(struct i915_gem_context *ctx) if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - i915_gem_context_set_banned(ctx); + intel_context_set_banned(rq->context); } - if (!IS_ERR_OR_NULL(ctx->file_priv)) - client_mark_guilty(ctx->file_priv, ctx); + client_mark_guilty(ctx, banned); +out: + i915_gem_context_put(ctx); return banned; } -static void context_mark_innocent(struct i915_gem_context *ctx) +static void mark_innocent(struct i915_request *rq) { - atomic_inc(&ctx->active_count); + struct i915_gem_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx) + atomic_inc(&ctx->active_count); + rcu_read_unlock(); } void __i915_request_reset(struct i915_request *rq, bool guilty) { - GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", - rq->engine->name, - rq->fence.context, - rq->fence.seqno, - yesno(guilty)); + RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); GEM_BUG_ON(i915_request_completed(rq)); + rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_skip(rq, -EIO); - if (context_mark_guilty(rq->gem_context)) + if (mark_guilty(rq)) engine_skip_context(rq); } else { dma_fence_set_error(&rq->fence, -EAGAIN); - context_mark_innocent(rq->gem_context); + mark_innocent(rq); } + rcu_read_unlock(); } static bool i915_in_reset(struct pci_dev *pdev) @@ -218,9 +247,8 @@ out: return ret; } -static int ironlake_do_reset(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_uncore *uncore = gt->uncore; int ret; @@ -564,7 +592,7 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt) else if (INTEL_GEN(i915) >= 6) return gen6_reset_engines; else if (INTEL_GEN(i915) >= 5) - return ironlake_do_reset; + return ilk_do_reset; else if (IS_G4X(i915)) return g4x_do_reset; else if (IS_G33(i915) || IS_PINEVIEW(i915)) @@ -592,7 +620,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - GEM_TRACE("engine_mask=%x\n", engine_mask); + GT_TRACE(gt, "engine_mask=%x\n", engine_mask); preempt_disable(); ret = reset(gt, engine_mask, retry); preempt_enable(); @@ -647,7 +675,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * GPU state upon resume, i.e. fail to restart after a reset. */ intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - engine->reset.prepare(engine); + if (engine->reset.prepare) + engine->reset.prepare(engine); } static void revoke_mmaps(struct intel_gt *gt) @@ -667,8 +696,13 @@ static void revoke_mmaps(struct intel_gt *gt) continue; GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); - node = &vma->obj->base.vma_node; + + if (!vma->mmo) + continue; + + node = &vma->mmo->vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, @@ -722,10 +756,11 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) static void reset_finish_engine(struct intel_engine_cs *engine) { - engine->reset.finish(engine); + if (engine->reset.finish) + engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - intel_engine_breadcrumbs_irq(engine); + intel_engine_signal_breadcrumbs(engine); } static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) @@ -745,8 +780,7 @@ static void nop_submit_request(struct i915_request *request) struct intel_engine_cs *engine = request->engine; unsigned long flags; - GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - engine->name, request->fence.context, request->fence.seqno); + RQ_TRACE(request, "-EIO\n"); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&engine->active.lock, flags); @@ -754,7 +788,7 @@ static void nop_submit_request(struct i915_request *request) i915_request_mark_complete(request); spin_unlock_irqrestore(&engine->active.lock, flags); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } static void __intel_gt_set_wedged(struct intel_gt *gt) @@ -773,7 +807,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) intel_engine_dump(engine, &p, "%s\n", engine->name); } - GEM_TRACE("start\n"); + GT_TRACE(gt, "start\n"); /* * First, stop submission to hw, but do not yet complete requests by @@ -799,11 +833,12 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) /* Mark all executing requests as skipped */ for_each_engine(engine, gt, id) - engine->cancel_requests(engine); + if (engine->reset.cancel) + engine->reset.cancel(engine); reset_finish(gt, awake); - GEM_TRACE("end\n"); + GT_TRACE(gt, "end\n"); } void intel_gt_set_wedged(struct intel_gt *gt) @@ -820,7 +855,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; - unsigned long flags; bool ok; if (!test_bit(I915_WEDGED, >->reset.flags)) @@ -830,7 +864,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) return false; - GEM_TRACE("start\n"); + GT_TRACE(gt, "start\n"); /* * Before unwedging, make sure that all pending operations @@ -842,7 +876,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * * No more can be submitted until we reset the wedged bit. */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); list_for_each_entry(tl, &timelines->active_list, link) { struct dma_fence *fence; @@ -850,7 +884,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (!fence) continue; - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* * All internal dependencies (i915_requests) will have @@ -863,10 +897,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) dma_fence_put(fence); /* Restart iteration after droping lock */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* We must reset pending GPU events before restoring our submission */ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ @@ -892,7 +926,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) */ intel_engines_reset_default_submission(gt); - GEM_TRACE("end\n"); + GT_TRACE(gt, "end\n"); smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, >->reset.flags); @@ -967,7 +1001,7 @@ void intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t awake; int ret; - GEM_TRACE("flags=%lx\n", gt->reset.flags); + GT_TRACE(gt, "flags=%lx\n", gt->reset.flags); might_sleep(); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); @@ -1070,9 +1104,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; + bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; - GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); + ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); if (!intel_engine_pm_get_if_awake(engine)) @@ -1085,14 +1120,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->gt->uc.guc.execbuf_client) + if (!uses_guc) ret = intel_gt_reset_engine(engine); else ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->gt->uc.guc.execbuf_client ? "GuC " : "", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } @@ -1195,7 +1230,7 @@ void intel_gt_handle_error(struct intel_gt *gt, engine_mask &= INTEL_INFO(gt->i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(gt->i915, engine_mask, msg); + i915_capture_error_state(gt->i915); intel_gt_clear_error_registers(gt, engine_mask); } @@ -1288,10 +1323,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!intel_gt_is_wedged(gt)) return 0; - /* Reset still in progress? Maybe we will recover? */ - if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) + if (intel_gt_has_init_error(gt)) return -EIO; + /* Reset still in progress? Maybe we will recover? */ if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) @@ -1313,6 +1348,9 @@ void intel_gt_init_reset(struct intel_gt *gt) init_waitqueue_head(>->reset.queue); mutex_init(>->reset.mutex); init_srcu_struct(>->reset.backoff_srcu); + + /* no GPU until we are ready! */ + __set_bit(I915_WEDGED, >->reset.flags); } void intel_gt_fini_reset(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 93026217c121..bc44fe8e5ffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -33,6 +33,7 @@ #include "gem/i915_gem_context.h" +#include "gen6_ppgtt.h" #include "i915_drv.h" #include "i915_trace.h" #include "intel_context.h" @@ -362,6 +363,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) */ flags |= PIPE_CONTROL_CS_STALL; + /* + * CS_STALL suggests at least a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + /* Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. @@ -380,13 +387,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache @@ -454,7 +454,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; @@ -496,14 +497,13 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask) static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) { - struct drm_i915_private *dev_priv = engine->i915; u32 addr; addr = lower_32_bits(phys); - if (INTEL_GEN(dev_priv) >= 4) + if (INTEL_GEN(engine->i915) >= 4) addr |= (phys >> 28) & 0xf0; - I915_WRITE(HWS_PGA, addr); + intel_uncore_write(engine->uncore, HWS_PGA, addr); } static struct page *status_page(struct intel_engine_cs *engine) @@ -522,14 +522,13 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine) static void set_hwsp(struct intel_engine_cs *engine, u32 offset) { - struct drm_i915_private *dev_priv = engine->i915; i915_reg_t hwsp; /* * The ring status page addresses are no longer next to the rest of * the ring registers as of gen7. */ - if (IS_GEN(dev_priv, 7)) { + if (IS_GEN(engine->i915, 7)) { switch (engine->id) { /* * No more rings exist on Gen7. Default case is only to shut up @@ -551,14 +550,14 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) hwsp = VEBOX_HWS_PGA_GEN7; break; } - } else if (IS_GEN(dev_priv, 6)) { + } else if (IS_GEN(engine->i915, 6)) { hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); } else { hwsp = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(hwsp, offset); - POSTING_READ(hwsp); + intel_uncore_write(engine->uncore, hwsp, offset); + intel_uncore_posting_read(engine->uncore, hwsp); } static void flush_cs_tlb(struct intel_engine_cs *engine) @@ -633,8 +632,8 @@ static int xcs_resume(struct intel_engine_cs *engine) struct intel_ring *ring = engine->legacy.ring; int ret = 0; - GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", - engine->name, ring->head, ring->tail); + ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", + ring->head, ring->tail); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); @@ -721,7 +720,7 @@ static int xcs_resume(struct intel_engine_cs *engine) } /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); out: intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); @@ -747,10 +746,10 @@ static void reset_prepare(struct intel_engine_cs *engine) * * FIXME: Wa for more modern gens needs to be validated */ - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); if (intel_engine_stop_cs(engine)) - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); + ENGINE_TRACE(engine, "timed out on STOP_RING\n"); intel_uncore_write_fw(uncore, RING_HEAD(base), @@ -766,12 +765,11 @@ static void reset_prepare(struct intel_engine_cs *engine) /* Check acts as a post */ if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - GEM_TRACE("%s: ring head [%x] not parked\n", - engine->name, - intel_uncore_read_fw(uncore, RING_HEAD(base))); + ENGINE_TRACE(engine, "ring head [%x] not parked\n", + intel_uncore_read_fw(uncore, RING_HEAD(base))); } -static void reset_ring(struct intel_engine_cs *engine, bool stalled) +static void reset_rewind(struct intel_engine_cs *engine, bool stalled) { struct i915_request *pos, *rq; unsigned long flags; @@ -842,7 +840,8 @@ static void reset_finish(struct intel_engine_cs *engine) static int rcs_resume(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; /* * Disable CONSTANT_BUFFER before it is loaded from the context @@ -854,13 +853,14 @@ static int rcs_resume(struct intel_engine_cs *engine) * they are already accustomed to from before contexts were * enabled. */ - if (IS_GEN(dev_priv, 4)) - I915_WRITE(ECOSKPD, + if (IS_GEN(i915, 4)) + intel_uncore_write(uncore, ECOSKPD, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ - if (IS_GEN_RANGE(dev_priv, 4, 6)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); + if (IS_GEN_RANGE(i915, 4, 6)) + intel_uncore_write(uncore, MI_MODE, + _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -868,38 +868,40 @@ static int rcs_resume(struct intel_engine_cs *engine) * * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv */ - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + if (IS_GEN_RANGE(i915, 6, 7)) + intel_uncore_write(uncore, MI_MODE, + _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); /* Required for the hardware to program scanline values for waiting */ /* WaEnableFlushTlbInvalidationMode:snb */ - if (IS_GEN(dev_priv, 6)) - I915_WRITE(GFX_MODE, + if (IS_GEN(i915, 6)) + intel_uncore_write(uncore, GFX_MODE, _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ - if (IS_GEN(dev_priv, 7)) - I915_WRITE(GFX_MODE_GEN7, + if (IS_GEN(i915, 7)) + intel_uncore_write(uncore, GFX_MODE_GEN7, _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(i915, 6)) { /* From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement * policy. [...] This bit must be reset. LRA replacement * policy is not supported." */ - I915_WRITE(CACHE_MODE_0, + intel_uncore_write(uncore, CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); } - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + if (IS_GEN_RANGE(i915, 6, 7)) + intel_uncore_write(uncore, INSTPM, + _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); return xcs_resume(engine); } -static void cancel_requests(struct intel_engine_cs *engine) +static void reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; unsigned long flags; @@ -1318,6 +1320,8 @@ static int ring_context_alloc(struct intel_context *ce) return PTR_ERR(vma); ce->state = vma; + if (engine->default_state) + __set_bit(CONTEXT_VALID_BIT, &ce->flags); } return 0; @@ -1325,26 +1329,12 @@ static int ring_context_alloc(struct intel_context *ce) static int ring_context_pin(struct intel_context *ce) { - int err; - - err = intel_context_active_acquire(ce); - if (err) - return err; - - err = __context_pin_ppgtt(ce); - if (err) - goto err_active; - - return 0; - -err_active: - intel_context_active_release(ce); - return err; + return __context_pin_ppgtt(ce); } static void ring_context_reset(struct intel_context *ce) { - intel_ring_reset(ce->ring, 0); + intel_ring_reset(ce->ring, ce->ring->emit); } static const struct intel_context_ops ring_context_ops = { @@ -1360,46 +1350,38 @@ static const struct intel_context_ops ring_context_ops = { .destroy = ring_context_destroy, }; -static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) +static int load_pd_dir(struct i915_request *rq, + const struct i915_ppgtt *ppgtt, + u32 valid) { const struct intel_engine_cs * const engine = rq->engine; u32 *cs; - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); - *cs++ = PP_DIR_DCLV_2G; + *cs++ = valid; *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; - intel_ring_advance(rq, cs); - - return 0; -} - -static int flush_pd_dir(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Stall until the page table load is complete */ + /* Stall until the page table load is complete? */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); - *cs++ = MI_NOOP; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); intel_ring_advance(rq, cs); - return 0; + + return rq->engine->emit_flush(rq, EMIT_FLUSH); } static inline int mi_set_context(struct i915_request *rq, u32 flags) @@ -1477,7 +1459,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; + *cs++ = i915_ggtt_offset(rq->context->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1547,10 +1529,10 @@ static int remap_l3_slice(struct i915_request *rq, int slice) static int remap_l3(struct i915_request *rq) { - struct i915_gem_context *ctx = rq->gem_context; + struct i915_gem_context *ctx = i915_request_gem_context(rq); int i, err; - if (!ctx->remap_slice) + if (!ctx || !ctx->remap_slice) return 0; for (i = 0; i < MAX_L3_SLICES; i++) { @@ -1566,19 +1548,42 @@ static int remap_l3(struct i915_request *rq) return 0; } +static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) +{ + int ret; + + if (!vm) + return 0; + + ret = rq->engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + return ret; + + /* + * Not only do we need a full barrier (post-sync write) after + * invalidating the TLBs, but we need to wait a little bit + * longer. Whether this is merely delaying us, or the + * subsequent flush is a key part of serialising with the + * post-sync op, this extra pass appears vital before a + * mm switch! + */ + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G); + if (ret) + return ret; + + return rq->engine->emit_flush(rq, EMIT_INVALIDATE); +} + static int switch_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; - struct i915_address_space *vm = vm_alias(ce); + struct intel_context *ce = rq->context; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - if (vm) { - ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm)); - if (ret) - return ret; - } + ret = switch_mm(rq, vm_alias(ce)); + if (ret) + return ret; if (ce->state) { u32 flags; @@ -1590,7 +1595,7 @@ static int switch_context(struct i915_request *rq) BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; - if (!i915_gem_context_is_kernel(rq->gem_context)) + if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) flags |= MI_RESTORE_EXT_STATE_EN; else flags |= MI_RESTORE_INHIBIT; @@ -1600,34 +1605,6 @@ static int switch_context(struct i915_request *rq) return ret; } - if (vm) { - struct intel_engine_cs *engine = rq->engine; - - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - return ret; - - ret = flush_pd_dir(rq); - if (ret) - return ret; - - /* - * Not only do we need a full barrier (post-sync write) after - * invalidating the TLBs, but we need to wait a little bit - * longer. Whether this is merely delaying us, or the - * subsequent flush is a key part of serialising with the - * post-sync op, this extra pass appears vital before a - * mm switch! - */ - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - return ret; - - ret = engine->emit_flush(rq, EMIT_FLUSH); - if (ret) - return ret; - } - ret = remap_l3(rq); if (ret) return ret; @@ -1639,7 +1616,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* @@ -1795,7 +1772,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; - engine->cancel_requests = cancel_requests; engine->park = NULL; engine->unpark = NULL; @@ -1807,7 +1783,7 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = gen6_bsd_submit_request; } -static void ring_destroy(struct intel_engine_cs *engine) +static void ring_release(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1821,8 +1797,6 @@ static void ring_destroy(struct intel_engine_cs *engine) intel_timeline_unpin(engine->legacy.timeline); intel_timeline_put(engine->legacy.timeline); - - kfree(engine); } static void setup_irq(struct intel_engine_cs *engine) @@ -1853,11 +1827,10 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); - engine->destroy = ring_destroy; - engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; - engine->reset.reset = reset_ring; + engine->reset.rewind = reset_rewind; + engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; engine->cops = &ring_context_ops; @@ -1968,6 +1941,10 @@ static void setup_vecs(struct intel_engine_cs *engine) int intel_ring_submission_setup(struct intel_engine_cs *engine) { + struct intel_timeline *timeline; + struct intel_ring *ring; + int err; + setup_common(engine); switch (engine->class) { @@ -1988,15 +1965,6 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return -ENODEV; } - return 0; -} - -int intel_ring_submission_init(struct intel_engine_cs *engine) -{ - struct intel_timeline *timeline; - struct intel_ring *ring; - int err; - timeline = intel_timeline_create(engine->gt, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); @@ -2022,16 +1990,13 @@ int intel_ring_submission_init(struct intel_engine_cs *engine) engine->legacy.ring = ring; engine->legacy.timeline = timeline; - err = intel_engine_init_common(engine); - if (err) - goto err_ring_unpin; - GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = ring_release; + return 0; -err_ring_unpin: - intel_ring_unpin(ring); err_ring: intel_ring_put(ring); err_timeline_unpin: diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 20d6ee148afc..d2a3d935d186 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,11 @@ static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) return mask & ~rps->pm_intrmsk_mbz; } +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + static u32 rps_pm_mask(struct intel_rps *rps, u8 val) { u32 mask = 0; @@ -78,8 +83,7 @@ static void rps_enable_interrupts(struct intel_rps *rps) gen6_gt_pm_enable_irq(gt, rps->pm_events); spin_unlock_irq(>->irq_lock); - intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, - rps_pm_mask(rps, rps->cur_freq)); + set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq)); } static void gen6_rps_reset_interrupts(struct intel_rps *rps) @@ -113,8 +117,7 @@ static void rps_disable_interrupts(struct intel_rps *rps) rps->pm_events = 0; - intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, - rps_pm_sanitize_mask(rps, ~0u)); + set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); spin_lock_irq(>->irq_lock); gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); @@ -573,25 +576,21 @@ static void rps_set_power(struct intel_rps *rps, int new_power) if (IS_VALLEYVIEW(i915)) goto skip_hw_write; - intel_uncore_write(uncore, GEN6_RP_UP_EI, - GT_INTERVAL_FROM_US(i915, ei_up)); - intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(i915, - ei_up * threshold_up / 100)); - - intel_uncore_write(uncore, GEN6_RP_DOWN_EI, - GT_INTERVAL_FROM_US(i915, ei_down)); - intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(i915, - ei_down * threshold_down / 100)); - - intel_uncore_write(uncore, GEN6_RP_CONTROL, - (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); + set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up)); + set(uncore, GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100)); + + set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down)); + set(uncore, GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100)); + + set(uncore, GEN6_RP_CONTROL, + (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); skip_hw_write: rps->power.mode = new_power; @@ -666,7 +665,7 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) swreq = (GEN6_FREQUENCY(val) | GEN6_OFFSET(0) | GEN6_AGGRESSIVE_TURBO); - intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq); + set(uncore, GEN6_RPNSWREQ, swreq); return 0; } @@ -683,7 +682,7 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val) return err; } -static int rps_set(struct intel_rps *rps, u8 val) +static int rps_set(struct intel_rps *rps, u8 val, bool update) { struct drm_i915_private *i915 = rps_to_i915(rps); int err; @@ -701,7 +700,8 @@ static int rps_set(struct intel_rps *rps, u8 val) if (err) return err; - gen6_rps_set_thresholds(rps, val); + if (update) + gen6_rps_set_thresholds(rps, val); rps->last_freq = val; return 0; @@ -761,7 +761,7 @@ void intel_rps_park(struct intel_rps *rps) * power than the render powerwell. */ intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); - rps_set(rps, rps->idle_freq); + rps_set(rps, rps->idle_freq, false); intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); } @@ -777,7 +777,7 @@ void intel_rps_boost(struct i915_request *rq) spin_lock_irqsave(&rq->lock, flags); if (!i915_request_has_waitboost(rq) && !dma_fence_is_signaled_locked(&rq->fence)) { - rq->flags |= I915_REQUEST_WAITBOOST; + set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); if (!atomic_fetch_inc(&rps->num_waiters) && READ_ONCE(rps->cur_freq) < rps->boost_freq) @@ -790,14 +790,16 @@ void intel_rps_boost(struct i915_request *rq) int intel_rps_set(struct intel_rps *rps, u8 val) { - int err = 0; + int err; lockdep_assert_held(&rps->lock); GEM_BUG_ON(val > rps->max_freq); GEM_BUG_ON(val < rps->min_freq); if (rps->active) { - err = rps_set(rps, val); + err = rps_set(rps, val, true); + if (err) + return err; /* * Make sure we continue to get interrupts @@ -806,18 +808,15 @@ int intel_rps_set(struct intel_rps *rps, u8 val) if (INTEL_GEN(rps_to_i915(rps)) >= 6) { struct intel_uncore *uncore = rps_to_uncore(rps); - intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS, - rps_limits(rps, val)); + set(uncore, + GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); - intel_uncore_write(uncore, GEN6_PMINTRMSK, - rps_pm_mask(rps, val)); + set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); } } - if (err == 0) - rps->cur_freq = val; - - return err; + rps->cur_freq = val; + return 0; } static void gen6_rps_init(struct intel_rps *rps) @@ -878,7 +877,7 @@ static bool rps_reset(struct intel_rps *rps) rps->power.mode = -1; rps->last_freq = -1; - if (rps_set(rps, rps->min_freq)) { + if (rps_set(rps, rps->min_freq, true)) { DRM_ERROR("Failed to reset RPS to initial values\n"); return false; } @@ -1201,7 +1200,7 @@ void intel_rps_enable(struct intel_rps *rps) static void gen6_rps_disable(struct intel_rps *rps) { - intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); + set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); } void intel_rps_disable(struct intel_rps *rps) @@ -1566,7 +1565,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) return; if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(gt->engine[VECS0]); + intel_engine_signal_breadcrumbs(gt->engine[VECS0]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -1663,23 +1662,53 @@ void intel_rps_init(struct intel_rps *rps) if (INTEL_GEN(i915) <= 7) rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; - if (INTEL_GEN(i915) >= 8) + if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } -u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat) +u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (INTEL_GEN(i915) >= 9) + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + cagf = (rpstat >> 8) & 0xff; + else if (INTEL_GEN(i915) >= 9) cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; else cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - return cagf; + return cagf; +} + +static u32 read_cagf(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 freq; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + vlv_punit_get(i915); + freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + } else { + freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1); + } + + return intel_rps_get_cagf(rps, freq); +} + +u32 intel_rps_read_actual_frequency(struct intel_rps *rps) +{ + struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; + + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_gpu_freq(rps, read_cagf(rps)); + + return freq; } /* External interface for intel_ips.ko */ @@ -1715,6 +1744,7 @@ void intel_rps_driver_register(struct intel_rps *rps) * set up, to avoid intel-ips sneaking in and reading bogus values. */ if (IS_GEN(gt->i915, 5)) { + GEM_BUG_ON(ips_mchdev); rcu_assign_pointer(ips_mchdev, gt->i915); ips_ping_for_i915_load(); } @@ -1722,7 +1752,8 @@ void intel_rps_driver_register(struct intel_rps *rps) void intel_rps_driver_unregister(struct intel_rps *rps) { - rcu_assign_pointer(ips_mchdev, NULL); + if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) + rcu_assign_pointer(ips_mchdev, NULL); } static struct drm_i915_private *mchdev_get(void) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 9518c66c9792..dfa98194f3b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -29,7 +29,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); -u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat1); +u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); +u32 intel_rps_read_actual_frequency(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 649798c184fb..87716529cd2f 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -15,6 +15,9 @@ #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS + struct intel_timeline_hwsp { struct intel_gt *gt; struct intel_gt_timelines *gt_timelines; @@ -23,14 +26,6 @@ struct intel_timeline_hwsp { u64 free_bitmap; }; -struct intel_timeline_cacheline { - struct i915_active active; - struct intel_timeline_hwsp *hwsp; - void *vaddr; -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS -}; - static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); i915_active_fini(&cl->active); - kfree(cl); + kfree_rcu(cl, rcu); } __i915_active_call @@ -254,7 +249,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -262,7 +257,7 @@ int intel_timeline_init(struct intel_timeline *timeline, return 0; } -static void timelines_init(struct intel_gt *gt) +void intel_gt_init_timelines(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; @@ -273,11 +268,6 @@ static void timelines_init(struct intel_gt *gt) INIT_LIST_HEAD(&timelines->hwsp_free_list); } -void intel_timelines_init(struct drm_i915_private *i915) -{ - timelines_init(&i915->gt); -} - void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); @@ -338,7 +328,6 @@ int intel_timeline_pin(struct intel_timeline *tl) void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; - unsigned long flags; /* * Pretend we are serialised by the timeline->mutex. @@ -359,21 +348,19 @@ void intel_timeline_enter(struct intel_timeline *tl) * use atomic to manipulate tl->active_count. */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); if (atomic_add_unless(&tl->active_count, 1, 0)) return; - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); if (!atomic_fetch_inc(&tl->active_count)) list_add_tail(&tl->link, &timelines->active_list); - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); } void intel_timeline_exit(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; - unsigned long flags; /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); @@ -382,10 +369,10 @@ void intel_timeline_exit(struct intel_timeline *tl) if (atomic_add_unless(&tl->active_count, -1, 1)) return; - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* * Since this timeline is idle, all bariers upon which we were waiting @@ -521,46 +508,35 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline *tl; + struct intel_timeline_cacheline *cl; int err; + GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); + rcu_read_lock(); - tl = rcu_dereference(from->timeline); - if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) - tl = NULL; + cl = rcu_dereference(from->hwsp_cacheline); + if (unlikely(!i915_active_acquire_if_busy(&cl->active))) + goto unlock; /* seqno wrapped and completed! */ + if (unlikely(i915_request_completed(from))) + goto release; rcu_read_unlock(); - if (!tl) /* already completed */ - return 1; - - GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); - - err = -EBUSY; - if (mutex_trylock(&tl->mutex)) { - struct intel_timeline_cacheline *cl = from->hwsp_cacheline; - if (i915_request_completed(from)) { - err = 1; - goto unlock; - } + err = cacheline_ref(cl, to); + if (err) + goto out; - err = cacheline_ref(cl, to); - if (err) - goto unlock; + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; - if (likely(cl == tl->hwsp_cacheline)) { - *hwsp = tl->hwsp_offset; - } else { /* across a seqno wrap, recover the original offset */ - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * - CACHELINE_BYTES; - } +out: + i915_active_release(&cl->active); + return err; +release: + i915_active_release(&cl->active); unlock: - mutex_unlock(&tl->mutex); - } - intel_timeline_put(tl); - - return err; + rcu_read_unlock(); + return 1; } void intel_timeline_unpin(struct intel_timeline *tl) @@ -583,7 +559,7 @@ void __intel_timeline_free(struct kref *kref) kfree_rcu(timeline, rcu); } -static void timelines_fini(struct intel_gt *gt) +void intel_gt_fini_timelines(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; @@ -591,11 +567,6 @@ static void timelines_fini(struct intel_gt *gt) GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); } -void intel_timelines_fini(struct drm_i915_private *i915) -{ - timelines_fini(&i915->gt); -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "gt/selftests/mock_timeline.c" #include "gt/selftest_timeline.c" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index f583af1ba18d..f5b7eade3809 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -88,7 +88,7 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *until, u32 *hwsp_offset); -void intel_timelines_init(struct drm_i915_private *i915); -void intel_timelines_fini(struct drm_i915_private *i915); +void intel_gt_init_timelines(struct intel_gt *gt); +void intel_gt_fini_timelines(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index aaf15cbe1ce1..02181c5020db 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -10,14 +10,15 @@ #include <linux/list.h> #include <linux/kref.h> #include <linux/mutex.h> +#include <linux/rcupdate.h> #include <linux/types.h> #include "i915_active_types.h" -struct drm_i915_private; struct i915_vma; -struct intel_timeline_cacheline; struct i915_syncmap; +struct intel_gt; +struct intel_timeline_hwsp; struct intel_timeline { u64 fence_context; @@ -87,4 +88,13 @@ struct intel_timeline { struct rcu_head rcu; }; +struct intel_timeline_cacheline { + struct i915_active active; + + struct intel_timeline_hwsp *hwsp; + void *vaddr; + + struct rcu_head rcu; +}; + #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e4bccc14602f..4e292d4bf7b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_ring.h" #include "intel_workarounds.h" @@ -146,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } } -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) +static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val, u32 read_mask) { struct i915_wa wa = { .reg = reg, .mask = mask, .val = val, - .read = mask, + .read = read_mask, }; _wa_add(wal, &wa); } static void +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) +{ + wa_add(wal, reg, mask, val, mask); +} + +static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); @@ -247,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, /* WaDisableDopClockGating:bdw * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, @@ -568,9 +575,24 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + u32 val; + /* Wa_1409142259:tgl */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + + /* Wa_1604555607:tgl */ + val = intel_uncore_read(engine->uncore, FF_MODE2); + val &= ~FF_MODE2_TDS_TIMER_MASK; + val |= FF_MODE2_TDS_TIMER_128; + /* + * FIXME: FF_MODE2 register is not readable till TGL B0. We can + * enable verification of WA from the later steppings, which enables + * the read of FF_MODE2. + */ + wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val, + IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 : + FF_MODE2_TDS_TIMER_MASK); } static void @@ -1315,6 +1337,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_RC_SLEEP_PSMI_CONTROL, GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | GEN8_RC_SEMA_IDLE_MSG_DISABLE); + + /* + * Wa_1606679103:tgl + * (see also Wa_1606682166:icl) + */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); } if (IS_GEN(i915, 11)) { @@ -1574,7 +1604,9 @@ static int engine_wa_list_verify(struct intel_context *ce, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_engine_pm_get(ce->engine); rq = intel_context_create_request(ce); + intel_engine_pm_put(ce->engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_vma; @@ -1584,16 +1616,17 @@ static int engine_wa_list_verify(struct intel_context *ce, if (err) goto err_vma; + i915_request_get(rq); i915_request_add(rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; - goto err_vma; + goto err_rq; } results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(results)) { err = PTR_ERR(results); - goto err_vma; + goto err_rq; } err = 0; @@ -1607,6 +1640,8 @@ static int engine_wa_list_verify(struct intel_context *ce, i915_gem_object_unpin_map(vma->obj); +err_rq: + i915_request_put(rq); err_vma: i915_vma_unpin(vma); i915_vma_put(vma); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 83f549d203a0..a560b7eee2cd 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -77,7 +77,7 @@ static void advance(struct i915_request *request) i915_request_mark_complete(request); GEM_BUG_ON(!i915_request_completed(request)); - intel_engine_queue_breadcrumbs(request->engine); + intel_engine_signal_breadcrumbs(request->engine); } static void hw_delay_complete(struct timer_list *t) @@ -149,7 +149,11 @@ static int mock_context_alloc(struct intel_context *ce) static int mock_context_pin(struct intel_context *ce) { - return intel_context_active_acquire(ce); + return 0; +} + +static void mock_context_reset(struct intel_context *ce) +{ } static const struct intel_context_ops mock_context_ops = { @@ -161,6 +165,7 @@ static const struct intel_context_ops mock_context_ops = { .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, + .reset = mock_context_reset, .destroy = mock_context_destroy, }; @@ -207,16 +212,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine) { } -static void mock_reset(struct intel_engine_cs *engine, bool stalled) +static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled) { GEM_BUG_ON(stalled); } -static void mock_reset_finish(struct intel_engine_cs *engine) -{ -} - -static void mock_cancel_requests(struct intel_engine_cs *engine) +static void mock_reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; unsigned long flags; @@ -234,6 +235,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void mock_reset_finish(struct intel_engine_cs *engine) +{ +} + +static void mock_engine_release(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + + intel_engine_fini_retire(engine); + intel_engine_fini_breadcrumbs(engine); +} + struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, const char *name, int id) @@ -265,9 +284,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.submit_request = mock_submit_request; engine->base.reset.prepare = mock_reset_prepare; - engine->base.reset.reset = mock_reset; + engine->base.reset.rewind = mock_reset_rewind; + engine->base.reset.cancel = mock_reset_cancel; engine->base.reset.finish = mock_reset_finish; - engine->base.cancel_requests = mock_cancel_requests; + + engine->base.release = mock_engine_release; i915->gt.engine[id] = &engine->base; i915->gt.engine_class[0][id] = &engine->base; @@ -290,6 +311,7 @@ int mock_engine_init(struct intel_engine_cs *engine) intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); ce = create_kernel_context(engine); @@ -321,18 +343,3 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { } - -void mock_engine_free(struct intel_engine_cs *engine) -{ - struct mock_engine *mock = - container_of(engine, typeof(*mock), base); - - GEM_BUG_ON(timer_pending(&mock->hw_delay)); - - intel_context_unpin(engine->kernel_context); - intel_context_put(engine->kernel_context); - - intel_engine_fini_breadcrumbs(engine); - - kfree(engine); -} diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index bc720defc6b8..e874dfaa5316 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -5,6 +5,7 @@ */ #include "i915_selftest.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_gt.h" @@ -47,35 +48,36 @@ static int context_sync(struct intel_context *ce) mutex_lock(&tl->mutex); do { - struct dma_fence *fence; + struct i915_request *rq; long timeout; - fence = i915_active_fence_get(&tl->last_request); - if (!fence) + if (list_empty(&tl->requests)) break; - timeout = dma_fence_wait_timeout(fence, false, HZ / 10); + rq = list_last_entry(&tl->requests, typeof(*rq), link); + i915_request_get(rq); + + timeout = i915_request_wait(rq, 0, HZ / 10); if (timeout < 0) err = timeout; else - i915_request_retire_upto(to_request(fence)); + i915_request_retire_upto(rq); - dma_fence_put(fence); + i915_request_put(rq); } while (!err); mutex_unlock(&tl->mutex); return err; } -static int __live_context_size(struct intel_engine_cs *engine, - struct i915_gem_context *fixme) +static int __live_context_size(struct intel_engine_cs *engine) { struct intel_context *ce; struct i915_request *rq; void *vaddr; int err; - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -118,7 +120,7 @@ static int __live_context_size(struct intel_engine_cs *engine, goto err_unpin; /* Force the context switch */ - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -143,7 +145,6 @@ static int live_context_size(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; enum intel_engine_id id; int err = 0; @@ -152,10 +153,6 @@ static int live_context_size(void *arg) * HW tries to write past the end of one. */ - fixme = kernel_context(gt->i915); - if (IS_ERR(fixme)) - return PTR_ERR(fixme); - for_each_engine(engine, gt, id) { struct { struct drm_i915_gem_object *state; @@ -180,7 +177,7 @@ static int live_context_size(void *arg) /* Overlaps with the execlists redzone */ engine->context_size += I915_GTT_PAGE_SIZE; - err = __live_context_size(engine, fixme); + err = __live_context_size(engine); engine->context_size -= I915_GTT_PAGE_SIZE; @@ -193,13 +190,12 @@ static int live_context_size(void *arg) break; } - kernel_context_close(fixme); return err; } -static int __live_active_context(struct intel_engine_cs *engine, - struct i915_gem_context *fixme) +static int __live_active_context(struct intel_engine_cs *engine) { + unsigned long saved_heartbeat; struct intel_context *ce; int pass; int err; @@ -223,40 +219,55 @@ static int __live_active_context(struct intel_engine_cs *engine, return -EINVAL; } - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); + saved_heartbeat = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + for (pass = 0; pass <= 2; pass++) { struct i915_request *rq; + intel_engine_pm_get(engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err; + goto out_engine; } err = request_sync(rq); if (err) - goto err; + goto out_engine; /* Context will be kept active until after an idle-barrier. */ if (i915_active_is_idle(&ce->active)) { pr_err("context is not active; expected idle-barrier (%s pass %d)\n", engine->name, pass); err = -EINVAL; - goto err; + goto out_engine; } if (!intel_engine_pm_is_awake(engine)) { pr_err("%s is asleep before idle-barrier\n", engine->name); err = -EINVAL; - goto err; + goto out_engine; } + +out_engine: + intel_engine_pm_put(engine); + if (err) + goto err; } /* Now make sure our idle-barriers are flushed */ + err = intel_engine_flush_barriers(engine); + if (err) + goto err; + + /* Wait for the barrier and in the process wait for engine to park */ err = context_sync(engine->kernel_context); if (err) goto err; @@ -266,12 +277,15 @@ static int __live_active_context(struct intel_engine_cs *engine, err = -EINVAL; } + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { struct drm_printer p = drm_debug_printer(__func__); intel_engine_dump(engine, &p, - "%s is still awake after idle-barriers\n", - engine->name); + "%s is still awake:%d after idle-barriers\n", + engine->name, + atomic_read(&engine->wakeref.count)); GEM_TRACE_DUMP(); err = -EINVAL; @@ -279,6 +293,7 @@ static int __live_active_context(struct intel_engine_cs *engine, } err: + engine->props.heartbeat_interval_ms = saved_heartbeat; intel_context_put(ce); return err; } @@ -287,23 +302,11 @@ static int live_active_context(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; enum intel_engine_id id; - struct drm_file *file; int err = 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - fixme = live_context(gt->i915, file); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto out_file; - } - for_each_engine(engine, gt, id) { - err = __live_active_context(engine, fixme); + err = __live_active_context(engine); if (err) break; @@ -312,8 +315,6 @@ static int live_active_context(void *arg) break; } -out_file: - mock_file_free(gt->i915, file); return err; } @@ -345,10 +346,10 @@ unpin: return err; } -static int __live_remote_context(struct intel_engine_cs *engine, - struct i915_gem_context *fixme) +static int __live_remote_context(struct intel_engine_cs *engine) { struct intel_context *local, *remote; + unsigned long saved_heartbeat; int pass; int err; @@ -360,16 +361,26 @@ static int __live_remote_context(struct intel_engine_cs *engine, * clobber the idle-barrier. */ - remote = intel_context_create(fixme, engine); + if (intel_engine_pm_is_awake(engine)) { + pr_err("%s is awake before starting %s!\n", + engine->name, __func__); + return -EINVAL; + } + + remote = intel_context_create(engine); if (IS_ERR(remote)) return PTR_ERR(remote); - local = intel_context_create(fixme, engine); + local = intel_context_create(engine); if (IS_ERR(local)) { err = PTR_ERR(local); goto err_remote; } + saved_heartbeat = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + intel_engine_pm_get(engine); + for (pass = 0; pass <= 2; pass++) { err = __remote_sync(local, remote); if (err) @@ -387,6 +398,9 @@ static int __live_remote_context(struct intel_engine_cs *engine, } } + intel_engine_pm_put(engine); + engine->props.heartbeat_interval_ms = saved_heartbeat; + intel_context_put(local); err_remote: intel_context_put(remote); @@ -397,23 +411,11 @@ static int live_remote_context(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; enum intel_engine_id id; - struct drm_file *file; int err = 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - fixme = live_context(gt->i915, file); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto out_file; - } - for_each_engine(engine, gt, id) { - err = __live_remote_context(engine, fixme); + err = __live_remote_context(engine); if (err) break; @@ -422,8 +424,6 @@ static int live_remote_context(void *arg) break; } -out_file: - mock_file_free(gt->i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 3880f07c29b8..f88e445a1cae 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -4,7 +4,365 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" +#include <linux/sort.h> + +#include "intel_gt_pm.h" +#include "intel_rps.h" + +#include "i915_selftest.h" +#include "selftests/igt_flush_test.h" + +#define COUNT 5 + +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + return *a - *b; +} + +static void perf_begin(struct intel_gt *gt) +{ + intel_gt_pm_get(gt); + + /* Boost gpufreq to max [waitboost] and keep it fixed */ + atomic_inc(>->rps.num_waiters); + schedule_work(>->rps.work); + flush_work(>->rps.work); +} + +static int perf_end(struct intel_gt *gt) +{ + atomic_dec(>->rps.num_waiters); + intel_gt_pm_put(gt); + + return igt_flush_test(gt->i915); +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 cmd; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + if (INTEL_GEN(rq->i915) >= 8) + cmd++; + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_vma *create_empty_batch(struct intel_context *ce) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_put; + } + + cs[0] = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_unpin; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_unpin; + + i915_gem_object_unpin_map(obj); + return vma; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static u32 trifilter(u32 *a) +{ + u64 sum; + + sort(a, COUNT, sizeof(*a), cmp_u32, NULL); + + sum = mul_u32_u32(a[2], 2); + sum += a[1]; + sum += a[3]; + + return sum >> 2; +} + +static int perf_mi_bb_start(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + return 0; + + perf_begin(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct i915_vma *batch; + u32 cycles[COUNT]; + int i; + + intel_engine_pm_get(engine); + + batch = create_empty_batch(ce); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(batch); + if (err) { + intel_engine_pm_put(engine); + i915_vma_put(batch); + break; + } + + for (i = 0; i < ARRAY_SIZE(cycles); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = write_timestamp(rq, 2); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, 8, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 3); + if (err) + goto out; + +out: + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + if (err) + break; + + cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; + } + i915_vma_put(batch); + intel_engine_pm_put(engine); + if (err) + break; + + pr_info("%s: MI_BB_START cycles: %u\n", + engine->name, trifilter(cycles)); + } + if (perf_end(gt)) + err = -EIO; + + return err; +} + +static struct i915_vma *create_nop_batch(struct intel_context *ce) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_put; + } + + memset(cs, 0, SZ_64K); + cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_unpin; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_unpin; + + i915_gem_object_unpin_map(obj); + return vma; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int perf_mi_noop(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + return 0; + + perf_begin(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct i915_vma *base, *nop; + u32 cycles[COUNT]; + int i; + + intel_engine_pm_get(engine); + + base = create_empty_batch(ce); + if (IS_ERR(base)) { + err = PTR_ERR(base); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(base); + if (err) { + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + nop = create_nop_batch(ce); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + err = i915_vma_sync(nop); + if (err) { + i915_vma_put(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + break; + } + + for (i = 0; i < ARRAY_SIZE(cycles); i++) { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = write_timestamp(rq, 2); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + base->node.start, 8, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 3); + if (err) + goto out; + + err = rq->engine->emit_bb_start(rq, + nop->node.start, + nop->node.size, + 0); + if (err) + goto out; + + err = write_timestamp(rq, 4); + if (err) + goto out; + +out: + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + if (err) + break; + + cycles[i] = + (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - + (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); + } + i915_vma_put(nop); + i915_vma_put(base); + intel_engine_pm_put(engine); + if (err) + break; + + pr_info("%s: 16K MI_NOOP cycles: %u\n", + engine->name, trifilter(cycles)); + } + if (perf_end(gt)) + err = -EIO; + + return err; +} + +int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_mi_bb_start), + SUBTEST(perf_mi_noop), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} static int intel_mmio_bases_check(void *arg) { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index e864406bd2d9..43d4d589749f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -11,6 +11,28 @@ #include "intel_gt_requests.h" #include "i915_selftest.h" +static int timeline_sync(struct intel_timeline *tl) +{ + struct dma_fence *fence; + long timeout; + + fence = i915_active_fence_get(&tl->last_request); + if (!fence) + return 0; + + timeout = dma_fence_wait_timeout(fence, true, HZ / 2); + dma_fence_put(fence); + if (timeout < 0) + return timeout; + + return 0; +} + +static int engine_sync_barrier(struct intel_engine_cs *engine) +{ + return timeline_sync(engine->kernel_context->timeline); +} + struct pulse { struct i915_active active; struct kref kref; @@ -53,9 +75,7 @@ static struct pulse *pulse_create(void) static void pulse_unlock_wait(struct pulse *p) { - mutex_lock(&p->active.mutex); - mutex_unlock(&p->active.mutex); - flush_work(&p->active.work); + i915_active_unlock_wait(&p->active); } static int __live_idle_pulse(struct intel_engine_cs *engine, @@ -92,7 +112,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); - if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) { + if (engine_sync_barrier(engine)) { + struct drm_printer m = drm_err_printer("pulse"); + + pr_err("%s: no heartbeat pulse?\n", engine->name); + intel_engine_dump(engine, &m, "%s", engine->name); + err = -ETIME; goto out; } @@ -175,8 +200,7 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine) int err; int i; - ce = intel_context_create(engine->kernel_context->gem_context, - engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index d1752f15702a..09ff8e4f88af 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -6,6 +6,7 @@ */ #include "selftest_llc.h" +#include "selftest_rc6.h" static int live_gt_resume(void *arg) { @@ -50,6 +51,7 @@ static int live_gt_resume(void *arg) int intel_gt_pm_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_rc6_manual), SUBTEST(live_gt_resume), }; @@ -58,3 +60,20 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915) return intel_gt_live_subtests(tests, &i915->gt); } + +int intel_gt_pm_late_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + /* + * These tests may leave the system in an undesirable state. + * They are intended to be run last in CI and the system + * rebooted afterwards. + */ + SUBTEST(live_rc6_ctx_wa), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 85e9ccf5c304..3e5e6c86e843 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -25,7 +25,9 @@ #include <linux/kthread.h> #include "gem/i915_gem_context.h" -#include "gt/intel_gt.h" + +#include "intel_gt.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "i915_selftest.h" @@ -308,6 +310,24 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int igt_hang_sanitycheck(void *arg) { struct intel_gt *gt = arg; @@ -377,36 +397,30 @@ static int igt_reset_nop(void *arg) struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; unsigned int reset_count, count; enum intel_engine_id id; - struct drm_file *file; IGT_TIMEOUT(end_time); int err = 0; /* Check that we can reset during non-user portions of requests */ - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); reset_count = i915_reset_count(global); count = 0; do { for_each_engine(engine, gt, id) { + struct intel_context *ce; int i; + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -414,6 +428,8 @@ static int igt_reset_nop(void *arg) i915_request_add(rq); } + + intel_context_put(ce); } igt_global_reset_lock(gt); @@ -437,10 +453,7 @@ static int igt_reset_nop(void *arg) } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - err = igt_flush_test(gt->i915); -out: - mock_file_free(gt->i915, file); - if (intel_gt_is_wedged(gt)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; } @@ -450,36 +463,29 @@ static int igt_reset_nop_engine(void *arg) struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; - struct drm_file *file; - int err = 0; /* Check that we can engine-reset during non-user portions */ if (!intel_has_reset_engine(gt)) return 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); for_each_engine(engine, gt, id) { - unsigned int reset_count, reset_engine_count; - unsigned int count; + unsigned int reset_count, reset_engine_count, count; + struct intel_context *ce; + unsigned long heartbeat; IGT_TIMEOUT(end_time); + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); count = 0; + engine_heartbeat_disable(engine, &heartbeat); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -494,7 +500,7 @@ static int igt_reset_nop_engine(void *arg) for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -523,22 +529,18 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - pr_info("%s(%s): %d resets\n", __func__, engine->name, count); + engine_heartbeat_enable(engine, heartbeat); - if (err) - break; + pr_info("%s(%s): %d resets\n", __func__, engine->name, count); - err = igt_flush_test(gt->i915); + intel_context_put(ce); + if (igt_flush_test(gt->i915)) + err = -EIO; if (err) - break; + return err; } - err = igt_flush_test(gt->i915); -out: - mock_file_free(gt->i915, file); - if (intel_gt_is_wedged(gt)) - err = -EIO; - return err; + return 0; } static int __igt_reset_engine(struct intel_gt *gt, bool active) @@ -562,6 +564,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; + unsigned long heartbeat; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -577,7 +580,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - intel_engine_pm_get(engine); + engine_heartbeat_disable(engine, &heartbeat); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -629,7 +632,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); if (err) break; @@ -699,43 +702,43 @@ static int active_engine(void *data) struct active_engine *arg = data; struct intel_engine_cs *engine = arg->engine; struct i915_request *rq[8] = {}; - struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; - struct drm_file *file; - unsigned long count = 0; + struct intel_context *ce[ARRAY_SIZE(rq)]; + unsigned long count; int err = 0; - file = mock_file(engine->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - for (count = 0; count < ARRAY_SIZE(ctx); count++) { - ctx[count] = live_context(engine->i915, file); - if (IS_ERR(ctx[count])) { - err = PTR_ERR(ctx[count]); + for (count = 0; count < ARRAY_SIZE(ce); count++) { + ce[count] = intel_context_create(engine); + if (IS_ERR(ce[count])) { + err = PTR_ERR(ce[count]); while (--count) - i915_gem_context_put(ctx[count]); - goto err_file; + intel_context_put(ce[count]); + return err; } } + count = 0; while (!kthread_should_stop()) { unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; - new = igt_request_alloc(ctx[idx], engine); + new = intel_context_create_request(ce[idx]); if (IS_ERR(new)) { err = PTR_ERR(new); break; } - if (arg->flags & TEST_PRIORITY) - ctx[idx]->sched.priority = - i915_prandom_u32_max_state(512, &prng); - rq[idx] = i915_request_get(new); i915_request_add(new); + if (engine->schedule && arg->flags & TEST_PRIORITY) { + struct i915_sched_attr attr = { + .priority = + i915_prandom_u32_max_state(512, &prng), + }; + engine->schedule(rq[idx], &attr); + } + err = active_request_put(old); if (err) break; @@ -749,10 +752,10 @@ static int active_engine(void *data) /* Keep the first error */ if (!err) err = err__; + + intel_context_put(ce[count]); } -err_file: - mock_file_free(engine->i915, file); return err; } @@ -786,6 +789,7 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; + unsigned long heartbeat; IGT_TIMEOUT(end_time); if (flags & TEST_ACTIVE && @@ -828,7 +832,7 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - intel_engine_pm_get(engine); + engine_heartbeat_disable(engine, &heartbeat); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -902,7 +906,8 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); + pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); @@ -1300,32 +1305,21 @@ static int igt_reset_evict_ggtt(void *arg) static int igt_reset_evict_ppgtt(void *arg) { struct intel_gt *gt = arg; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct drm_file *file; + struct i915_ppgtt *ppgtt; int err; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); + /* aliasing == global gtt locking, covered above */ + if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) + return 0; - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } + ppgtt = i915_ppgtt_create(gt); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); - err = 0; - vm = i915_gem_context_get_vm_rcu(ctx); - if (!i915_is_ggtt(vm)) { - /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(gt, vm, - evict_vma, EXEC_OBJECT_WRITE); - } - i915_vm_put(vm); + err = __igt_reset_evict_vma(gt, &ppgtt->vm, + evict_vma, EXEC_OBJECT_WRITE); + i915_vm_put(&ppgtt->vm); -out: - mock_file_free(gt->i915, file); return err; } @@ -1504,7 +1498,7 @@ static int igt_handle_error(void *arg) struct intel_engine_cs *engine = gt->engine[RCS0]; struct hang h; struct i915_request *rq; - struct i915_gpu_state *error; + struct i915_gpu_coredump *error; int err; /* Check that we can issue a global GPU and engine reset */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index eb71ac2f992c..15cda024e3e4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -50,14 +50,31 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int live_sanitycheck(void *arg) { struct intel_gt *gt = arg; - struct i915_gem_engines_iter it; - struct i915_gem_context *ctx; - struct intel_context *ce; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct igt_spinner spin; - int err = -ENOMEM; + int err = 0; if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) return 0; @@ -65,17 +82,20 @@ static int live_sanitycheck(void *arg) if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ctx = kernel_context(gt->i915); - if (!ctx) - goto err_spin; - - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + for_each_engine(engine, gt, id) { + struct intel_context *ce; struct i915_request *rq; + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_ctx; + goto out_ctx; } i915_request_add(rq); @@ -84,21 +104,21 @@ static int live_sanitycheck(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx; + goto out_ctx; } igt_spinner_end(&spin); if (igt_flush_test(gt->i915)) { err = -EIO; - goto err_ctx; + goto out_ctx; } + +out_ctx: + intel_context_put(ce); + if (err) + break; } - err = 0; -err_ctx: - i915_gem_context_unlock_engines(ctx); - kernel_context_close(ctx); -err_spin: igt_spinner_fini(&spin); return err; } @@ -106,7 +126,6 @@ err_spin: static int live_unlite_restore(struct intel_gt *gt, int prio) { struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; struct igt_spinner spin; int err = -ENOMEM; @@ -119,15 +138,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) if (igt_spinner_init(&spin, gt)) return err; - ctx = kernel_context(gt->i915); - if (!ctx) - goto err_spin; - err = 0; for_each_engine(engine, gt, id) { struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; + unsigned long saved; int n; if (prio && !intel_engine_has_preemption(engine)) @@ -140,11 +156,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } + engine_heartbeat_disable(engine, &saved); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; - tmp = intel_context_create(ctx, engine); + tmp = intel_context_create(engine); if (IS_ERR(tmp)) { err = PTR_ERR(tmp); goto err_ce; @@ -247,14 +264,13 @@ err_ce: intel_context_put(ce[n]); } + engine_heartbeat_enable(engine, saved); if (igt_live_test_end(&t)) err = -EIO; if (err) break; } - kernel_context_close(ctx); -err_spin: igt_spinner_fini(&spin); return err; } @@ -309,17 +325,17 @@ emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) static struct i915_request * semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) { - struct i915_gem_context *ctx; + struct intel_context *ce; struct i915_request *rq; int err; - ctx = kernel_context(engine->i915); - if (!ctx) - return ERR_PTR(-ENOMEM); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) - goto out_ctx; + goto out_ce; err = 0; if (rq->engine->emit_init_breadcrumb) @@ -332,8 +348,8 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) if (err) rq = ERR_PTR(err); -out_ctx: - kernel_context_close(ctx); +out_ce: + intel_context_put(ce); return rq; } @@ -348,7 +364,7 @@ release_queue(struct intel_engine_cs *engine, struct i915_request *rq; u32 *cs; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -468,12 +484,16 @@ static int live_timeslice_preempt(void *arg) enum intel_engine_id id; for_each_engine(engine, gt, id) { + unsigned long saved; + if (!intel_engine_has_preemption(engine)) continue; memset(vaddr, 0, PAGE_SIZE); + engine_heartbeat_disable(engine, &saved); err = slice_semaphore_queue(engine, vma, count); + engine_heartbeat_enable(engine, saved); if (err) goto err_pin; @@ -497,7 +517,7 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine) { struct i915_request *rq; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) return rq; @@ -507,13 +527,19 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine) return rq; } -static void wait_for_submit(struct intel_engine_cs *engine, - struct i915_request *rq) +static int wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) { + timeout += jiffies; do { cond_resched(); intel_engine_flush_submission(engine); - } while (!i915_request_is_active(rq)); + if (i915_request_is_active(rq)) + return 0; + } while (time_before(jiffies, timeout)); + + return -ETIME; } static long timeslice_threshold(const struct intel_engine_cs *engine) @@ -566,40 +592,49 @@ static int live_timeslice_queue(void *arg) .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; struct i915_request *rq, *nop; + unsigned long saved; if (!intel_engine_has_preemption(engine)) continue; + engine_heartbeat_disable(engine, &saved); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ rq = semaphore_queue(engine, vma, 0); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_pin; + goto err_heartbeat; } engine->schedule(rq, &attr); - wait_for_submit(engine, rq); + err = wait_for_submit(engine, rq, HZ / 2); + if (err) { + pr_err("%s: Timed out trying to submit semaphores\n", + engine->name); + goto err_rq; + } /* ELSP[1]: nop request */ nop = nop_request(engine); if (IS_ERR(nop)) { err = PTR_ERR(nop); - i915_request_put(rq); - goto err_pin; + goto err_rq; } - wait_for_submit(engine, nop); + err = wait_for_submit(engine, nop, HZ / 2); i915_request_put(nop); + if (err) { + pr_err("%s: Timed out trying to submit nop\n", + engine->name); + goto err_rq; + } GEM_BUG_ON(i915_request_completed(rq)); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Queue: semaphore signal, matching priority as semaphore */ err = release_queue(engine, vma, 1, effective_prio(rq)); - if (err) { - i915_request_put(rq); - goto err_pin; - } + if (err) + goto err_rq; intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.timer.expires) && @@ -630,12 +665,14 @@ static int live_timeslice_queue(void *arg) memset(vaddr, 0xff, PAGE_SIZE); err = -EIO; } +err_rq: i915_request_put(rq); +err_heartbeat: + engine_heartbeat_enable(engine, saved); if (err) break; } -err_pin: i915_vma_unpin(vma); err_map: i915_gem_object_unpin_map(obj); @@ -748,15 +785,19 @@ static int live_busywait_preempt(void *arg) *cs++ = 0; intel_ring_advance(lo, cs); + + i915_request_get(lo); i915_request_add(lo); if (wait_for(READ_ONCE(*map), 10)) { + i915_request_put(lo); err = -ETIMEDOUT; goto err_vma; } /* Low priority request should be busywaiting now */ if (i915_request_wait(lo, 0, 1) != -ETIME) { + i915_request_put(lo); pr_err("%s: Busywaiting request did not!\n", engine->name); err = -EIO; @@ -766,6 +807,7 @@ static int live_busywait_preempt(void *arg) hi = igt_request_alloc(ctx_hi, engine); if (IS_ERR(hi)) { err = PTR_ERR(hi); + i915_request_put(lo); goto err_vma; } @@ -773,6 +815,7 @@ static int live_busywait_preempt(void *arg) if (IS_ERR(cs)) { err = PTR_ERR(cs); i915_request_add(hi); + i915_request_put(lo); goto err_vma; } @@ -793,11 +836,13 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); + i915_request_put(lo); intel_gt_set_wedged(gt); err = -EIO; goto err_vma; } GEM_BUG_ON(READ_ONCE(*map)); + i915_request_put(lo); if (igt_live_test_end(&t)) { err = -EIO; @@ -1108,7 +1153,7 @@ static int live_nopreempt(void *arg) } /* Low priority client, but unpreemptable! */ - rq_a->flags |= I915_REQUEST_NOPREEMPT; + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { @@ -1187,13 +1232,13 @@ static int __cancel_active0(struct live_preempt_cancel *arg) __func__, arg->engine->name)) return -EIO; - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_ARB_CHECK); if (IS_ERR(rq)) return PTR_ERR(rq); + clear_bit(CONTEXT_BANNED, &rq->context->flags); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -1201,7 +1246,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) goto out; } - i915_gem_context_set_banned(arg->a.ctx); + intel_context_set_banned(rq->context); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -1236,13 +1281,13 @@ static int __cancel_active1(struct live_preempt_cancel *arg) __func__, arg->engine->name)) return -EIO; - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq[0] = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_NOOP); /* no preemption */ if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -1250,7 +1295,6 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); rq[1] = spinner_create_request(&arg->b.spin, arg->b.ctx, arg->engine, MI_ARB_CHECK); @@ -1259,13 +1303,14 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; } + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); if (err) goto out; - i915_gem_context_set_banned(arg->b.ctx); + intel_context_set_banned(rq[1]->context); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -1308,13 +1353,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) __func__, arg->engine->name)) return -EIO; - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq[0] = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_ARB_CHECK); if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -1322,13 +1367,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); if (IS_ERR(rq[1])) { err = PTR_ERR(rq[1]); goto out; } + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); @@ -1349,7 +1394,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - i915_gem_context_set_banned(arg->a.ctx); + intel_context_set_banned(rq[2]->context); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -1396,13 +1441,13 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) return 0; GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_NOOP); /* preemption disabled */ if (IS_ERR(rq)) return PTR_ERR(rq); + clear_bit(CONTEXT_BANNED, &rq->context->flags); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -1410,7 +1455,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) goto out; } - i915_gem_context_set_banned(arg->a.ctx); + intel_context_set_banned(rq->context); err = intel_engine_pulse(arg->engine); /* force reset */ if (err) goto out; @@ -1665,6 +1710,7 @@ static int live_suppress_wait_preempt(void *arg) { struct intel_gt *gt = arg; struct preempt_client client[4]; + struct i915_request *rq[ARRAY_SIZE(client)] = {}; struct intel_engine_cs *engine; enum intel_engine_id id; int err = -ENOMEM; @@ -1698,7 +1744,6 @@ static int live_suppress_wait_preempt(void *arg) continue; for (depth = 0; depth < ARRAY_SIZE(client); depth++) { - struct i915_request *rq[ARRAY_SIZE(client)]; struct i915_request *dummy; engine->execlists.preempt_hang.count = 0; @@ -1708,18 +1753,22 @@ static int live_suppress_wait_preempt(void *arg) goto err_client_3; for (i = 0; i < ARRAY_SIZE(client); i++) { - rq[i] = spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); - if (IS_ERR(rq[i])) { - err = PTR_ERR(rq[i]); + struct i915_request *this; + + this = spinner_create_request(&client[i].spin, + client[i].ctx, engine, + MI_NOOP); + if (IS_ERR(this)) { + err = PTR_ERR(this); goto err_wedged; } /* Disable NEWCLIENT promotion */ - __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request, + __i915_active_fence_set(&i915_request_timeline(this)->last_request, &dummy->fence); - i915_request_add(rq[i]); + + rq[i] = i915_request_get(this); + i915_request_add(this); } dummy_request_free(dummy); @@ -1740,8 +1789,11 @@ static int live_suppress_wait_preempt(void *arg) goto err_wedged; } - for (i = 0; i < ARRAY_SIZE(client); i++) + for (i = 0; i < ARRAY_SIZE(client); i++) { igt_spinner_end(&client[i].spin); + i915_request_put(rq[i]); + rq[i] = NULL; + } if (igt_flush_test(gt->i915)) goto err_wedged; @@ -1769,8 +1821,10 @@ err_client_0: return err; err_wedged: - for (i = 0; i < ARRAY_SIZE(client); i++) + for (i = 0; i < ARRAY_SIZE(client); i++) { igt_spinner_end(&client[i].spin); + i915_request_put(rq[i]); + } intel_gt_set_wedged(gt); err = -EIO; goto err_client_3; @@ -1815,6 +1869,8 @@ static int live_chain_preempt(void *arg) MI_ARB_CHECK); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); ring_size = rq->wa_tail - rq->head; @@ -1827,8 +1883,10 @@ static int live_chain_preempt(void *arg) igt_spinner_end(&lo.spin); if (i915_request_wait(rq, 0, HZ / 2) < 0) { pr_err("Timed out waiting to flush %s\n", engine->name); + i915_request_put(rq); goto err_wedged; } + i915_request_put(rq); if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; @@ -1862,6 +1920,8 @@ static int live_chain_preempt(void *arg) rq = igt_request_alloc(hi.ctx, engine); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); engine->schedule(rq, &attr); @@ -1874,14 +1934,19 @@ static int live_chain_preempt(void *arg) count); intel_engine_dump(engine, &p, "%s\n", engine->name); + i915_request_put(rq); goto err_wedged; } igt_spinner_end(&lo.spin); + i915_request_put(rq); rq = igt_request_alloc(lo.ctx, engine); if (IS_ERR(rq)) goto err_wedged; + + i915_request_get(rq); i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1890,8 +1955,11 @@ static int live_chain_preempt(void *arg) count); intel_engine_dump(engine, &p, "%s\n", engine->name); + + i915_request_put(rq); goto err_wedged; } + i915_request_put(rq); } if (igt_live_test_end(&t)) { @@ -1915,6 +1983,201 @@ err_wedged: goto err_client_lo; } +static int create_gang(struct intel_engine_cs *engine, + struct i915_request **prev) +{ + struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ce; + } + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_obj; + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) + goto err_obj; + + /* Semaphore target: spin until zero */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + + if (*prev) { + u64 offset = (*prev)->batch->node.start; + + /* Terminate the spinner in the next lower priority batch. */ + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = 0; + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto err_obj; + + rq->batch = vma; + i915_request_get(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_rq; + + i915_gem_object_put(obj); + intel_context_put(ce); + + rq->client_link.next = &(*prev)->client_link; + *prev = rq; + return 0; + +err_rq: + i915_request_put(rq); +err_obj: + i915_gem_object_put(obj); +err_ce: + intel_context_put(ce); + return err; +} + +static int live_preempt_gang(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + /* + * Build as long a chain of preempters as we can, with each + * request higher priority than the last. Once we are ready, we release + * the last batch which then precolates down the chain, each releasing + * the next oldest in turn. The intent is to simply push as hard as we + * can with the number of preemptions, trying to exceed narrow HW + * limits. At a minimum, we insist that we can sort all the user + * high priority levels into execution order. + */ + + for_each_engine(engine, gt, id) { + struct i915_request *rq = NULL; + struct igt_live_test t; + IGT_TIMEOUT(end_time); + int prio = 0; + int err = 0; + u32 *cs; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) + return -EIO; + + do { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(prio++), + }; + + err = create_gang(engine, &rq); + if (err) + break; + + /* Submit each spinner at increasing priority */ + engine->schedule(rq, &attr); + + if (prio <= I915_PRIORITY_MAX) + continue; + + if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) + break; + + if (__igt_timeout(end_time, NULL)) + break; + } while (1); + pr_debug("%s: Preempt chain of %d requests\n", + engine->name, prio); + + /* + * Such that the last spinner is the highest priority and + * should execute first. When that spinner completes, + * it will terminate the next lowest spinner until there + * are no more spinners and the gang is complete. + */ + cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); + if (!IS_ERR(cs)) { + *cs = 0; + i915_gem_object_unpin_map(rq->batch->obj); + } else { + err = PTR_ERR(cs); + intel_gt_set_wedged(gt); + } + + while (rq) { /* wait for each rq from highest to lowest prio */ + struct i915_request *n = + list_next_entry(rq, client_link); + + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("Failed to flush chain of %d requests, at %d\n", + prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -ETIME; + } + + i915_request_put(rq); + rq = n; + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + static int live_preempt_hang(void *arg) { struct intel_gt *gt = arg; @@ -2391,28 +2654,18 @@ static int nop_virtual_engine(struct intel_gt *gt, #define CHAIN BIT(0) { IGT_TIMEOUT(end_time); - struct i915_request *request[16]; - struct i915_gem_context *ctx[16]; + struct i915_request *request[16] = {}; struct intel_context *ve[16]; unsigned long n, prime, nc; struct igt_live_test t; ktime_t times[2] = {}; int err; - GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); + GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ctx[n] = kernel_context(gt->i915); - if (!ctx[n]) { - err = -ENOMEM; - nctx = n; - goto out; - } - - ve[n] = intel_execlists_create_virtual(ctx[n], - siblings, nsibling); + ve[n] = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve[n])) { - kernel_context_close(ctx[n]); err = PTR_ERR(ve[n]); nctx = n; goto out; @@ -2421,7 +2674,6 @@ static int nop_virtual_engine(struct intel_gt *gt, err = intel_context_pin(ve[n]); if (err) { intel_context_put(ve[n]); - kernel_context_close(ctx[n]); nctx = n; goto out; } @@ -2437,27 +2689,35 @@ static int nop_virtual_engine(struct intel_gt *gt, if (flags & CHAIN) { for (nc = 0; nc < nctx; nc++) { for (n = 0; n < prime; n++) { - request[nc] = - i915_request_create(ve[nc]); - if (IS_ERR(request[nc])) { - err = PTR_ERR(request[nc]); + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto out; } - i915_request_add(request[nc]); + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); } } } else { for (n = 0; n < prime; n++) { for (nc = 0; nc < nctx; nc++) { - request[nc] = - i915_request_create(ve[nc]); - if (IS_ERR(request[nc])) { - err = PTR_ERR(request[nc]); + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto out; } - i915_request_add(request[nc]); + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); } } } @@ -2483,6 +2743,11 @@ static int nop_virtual_engine(struct intel_gt *gt, if (prime == 1) times[0] = times[1]; + for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); + request[nc] = NULL; + } + if (__igt_timeout(end_time, NULL)) break; } @@ -2500,9 +2765,9 @@ out: err = -EIO; for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); intel_context_unpin(ve[nc]); intel_context_put(ve[nc]); - kernel_context_close(ctx[nc]); } return err; } @@ -2561,7 +2826,6 @@ static int mask_virtual_engine(struct intel_gt *gt, unsigned int nsibling) { struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; - struct i915_gem_context *ctx; struct intel_context *ve; struct igt_live_test t; unsigned int n; @@ -2572,11 +2836,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ctx = kernel_context(gt->i915); - if (!ctx) - return -ENOMEM; - - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -2644,7 +2904,6 @@ out_unpin: out_put: intel_context_put(ve); out_close: - kernel_context_close(ctx); return err; } @@ -2684,7 +2943,6 @@ static int preserved_virtual_engine(struct intel_gt *gt, unsigned int nsibling) { struct i915_request *last = NULL; - struct i915_gem_context *ctx; struct intel_context *ve; struct i915_vma *scratch; struct igt_live_test t; @@ -2692,17 +2950,11 @@ static int preserved_virtual_engine(struct intel_gt *gt, int err = 0; u32 *cs; - ctx = kernel_context(gt->i915); - if (!ctx) - return -ENOMEM; - scratch = create_scratch(siblings[0]->gt); - if (IS_ERR(scratch)) { - err = PTR_ERR(scratch); - goto out_close; - } + if (IS_ERR(scratch)) + return PTR_ERR(scratch); - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -2785,8 +3037,6 @@ out_put: intel_context_put(ve); out_scratch: i915_vma_unpin_and_release(&scratch, 0); -out_close: - kernel_context_close(ctx); return err; } @@ -2838,16 +3088,54 @@ static int bond_virtual_engine(struct intel_gt *gt, #define BOND_SCHEDULE BIT(0) { struct intel_engine_cs *master; - struct i915_gem_context *ctx; struct i915_request *rq[16]; enum intel_engine_id id; + struct igt_spinner spin; unsigned long n; int err; + /* + * A set of bonded requests is intended to be run concurrently + * across a number of engines. We use one request per-engine + * and a magic fence to schedule each of the bonded requests + * at the same time. A consequence of our current scheduler is that + * we only move requests to the HW ready queue when the request + * becomes ready, that is when all of its prerequisite fences have + * been signaled. As one of those fences is the master submit fence, + * there is a delay on all secondary fences as the HW may be + * currently busy. Equally, as all the requests are independent, + * they may have other fences that delay individual request + * submission to HW. Ergo, we do not guarantee that all requests are + * immediately submitted to HW at the same time, just that if the + * rules are abided by, they are ready at the same time as the + * first is submitted. Userspace can embed semaphores in its batch + * to ensure parallel execution of its phases as it requires. + * Though naturally it gets requested that perhaps the scheduler should + * take care of parallel execution, even across preemption events on + * different HW. (The proper answer is of course "lalalala".) + * + * With the submit-fence, we have identified three possible phases + * of synchronisation depending on the master fence: queued (not + * ready), executing, and signaled. The first two are quite simple + * and checked below. However, the signaled master fence handling is + * contentious. Currently we do not distinguish between a signaled + * fence and an expired fence, as once signaled it does not convey + * any information about the previous execution. It may even be freed + * and hence checking later it may not exist at all. Ergo we currently + * do not apply the bonding constraint for an already signaled fence, + * as our expectation is that it should not constrain the secondaries + * and is outside of the scope of the bonded request API (i.e. all + * userspace requests are meant to be running in parallel). As + * it imposes no constraint, and is effectively a no-op, we do not + * check below as normal execution flows are checked extensively above. + * + * XXX Is the degenerate handling of signaled submit fences the + * expected behaviour for userpace? + */ + GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - ctx = kernel_context(gt->i915); - if (!ctx) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; err = 0; @@ -2860,7 +3148,9 @@ static int bond_virtual_engine(struct intel_gt *gt, memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - rq[0] = igt_request_alloc(ctx, master); + rq[0] = igt_spinner_create_request(&spin, + master->kernel_context, + MI_NOOP); if (IS_ERR(rq[0])) { err = PTR_ERR(rq[0]); goto out; @@ -2873,16 +3163,21 @@ static int bond_virtual_engine(struct intel_gt *gt, &fence, GFP_KERNEL); } + i915_request_add(rq[0]); if (err < 0) goto out; + if (!(flags & BOND_SCHEDULE) && + !igt_wait_for_spinner(&spin, rq[0])) { + err = -EIO; + goto out; + } + for (n = 0; n < nsibling; n++) { struct intel_context *ve; - ve = intel_execlists_create_virtual(ctx, - siblings, - nsibling); + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); onstack_fence_fini(&fence); @@ -2924,6 +3219,8 @@ static int bond_virtual_engine(struct intel_gt *gt, } } onstack_fence_fini(&fence); + intel_engine_flush_submission(master); + igt_spinner_end(&spin); if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { pr_err("Master request did not execute (on %s)!\n", @@ -2960,7 +3257,7 @@ out: if (igt_flush_test(gt->i915)) err = -EIO; - kernel_context_close(ctx); + igt_spinner_fini(&spin); return err; } @@ -3028,6 +3325,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_gang), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), @@ -3080,7 +3378,7 @@ static int live_lrc_layout(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - u32 *mem; + u32 *lrc; int err; /* @@ -3088,13 +3386,13 @@ static int live_lrc_layout(void *arg) * match the layout saved by HW. */ - mem = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!mem) + lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!lrc) return -ENOMEM; err = 0; for_each_engine(engine, gt, id) { - u32 *hw, *lrc; + u32 *hw; int dw; if (!engine->default_state) @@ -3108,8 +3406,7 @@ static int live_lrc_layout(void *arg) } hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); - lrc = memset(mem, 0, PAGE_SIZE); - execlists_init_reg_state(lrc, + execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), engine->kernel_context, engine, engine->kernel_context->ring, @@ -3124,6 +3421,13 @@ static int live_lrc_layout(void *arg) continue; } + if (lrc[dw] == 0) { + pr_debug("%s: skipped instruction %x at dword %d\n", + engine->name, lri, dw); + dw++; + continue; + } + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { pr_err("%s: Expected LRI command at dword %d, found %08x\n", engine->name, dw, lri); @@ -3172,7 +3476,7 @@ static int live_lrc_layout(void *arg) break; } - kfree(mem); + kfree(lrc); return err; } @@ -3207,12 +3511,12 @@ static int live_lrc_fixed(void *arg) } tbl[] = { { i915_mmio_reg_offset(RING_START(engine->mmio_base)), - CTX_RING_BUFFER_START - 1, + CTX_RING_START - 1, "RING_START" }, { i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), - CTX_RING_BUFFER_CONTROL - 1, + CTX_RING_CTL - 1, "RING_CTL" }, { @@ -3231,7 +3535,7 @@ static int live_lrc_fixed(void *arg) "RING_MI_MODE" }, { - engine->mmio_base + 0x110, + i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), CTX_BB_STATE - 1, "BB_STATE" }, @@ -3270,8 +3574,7 @@ static int live_lrc_fixed(void *arg) return err; } -static int __live_lrc_state(struct i915_gem_context *fixme, - struct intel_engine_cs *engine, +static int __live_lrc_state(struct intel_engine_cs *engine, struct i915_vma *scratch) { struct intel_context *ce; @@ -3286,7 +3589,7 @@ static int __live_lrc_state(struct i915_gem_context *fixme, int err; int n; - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -3360,7 +3663,6 @@ static int live_lrc_state(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; struct i915_vma *scratch; enum intel_engine_id id; int err = 0; @@ -3370,18 +3672,12 @@ static int live_lrc_state(void *arg) * intel_context. */ - fixme = kernel_context(gt->i915); - if (!fixme) - return -ENOMEM; - scratch = create_scratch(gt); - if (IS_ERR(scratch)) { - err = PTR_ERR(scratch); - goto out_close; - } + if (IS_ERR(scratch)) + return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - err = __live_lrc_state(fixme, engine, scratch); + err = __live_lrc_state(engine, scratch); if (err) break; } @@ -3390,8 +3686,6 @@ static int live_lrc_state(void *arg) err = -EIO; i915_vma_unpin_and_release(&scratch, 0); -out_close: - kernel_context_close(fixme); return err; } @@ -3401,7 +3695,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine) u32 *cs; int n; - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -3424,8 +3718,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine) return 0; } -static int __live_gpr_clear(struct i915_gem_context *fixme, - struct intel_engine_cs *engine, +static int __live_gpr_clear(struct intel_engine_cs *engine, struct i915_vma *scratch) { struct intel_context *ce; @@ -3441,7 +3734,7 @@ static int __live_gpr_clear(struct i915_gem_context *fixme, if (err) return err; - ce = intel_context_create(fixme, engine); + ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -3503,7 +3796,6 @@ static int live_gpr_clear(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *fixme; struct i915_vma *scratch; enum intel_engine_id id; int err = 0; @@ -3513,18 +3805,12 @@ static int live_gpr_clear(void *arg) * to avoid leaking any information from previous contexts. */ - fixme = kernel_context(gt->i915); - if (!fixme) - return -ENOMEM; - scratch = create_scratch(gt); - if (IS_ERR(scratch)) { - err = PTR_ERR(scratch); - goto out_close; - } + if (IS_ERR(scratch)) + return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - err = __live_gpr_clear(fixme, engine, scratch); + err = __live_gpr_clear(engine, scratch); if (err) break; } @@ -3533,8 +3819,6 @@ static int live_gpr_clear(void *arg) err = -EIO; i915_vma_unpin_and_release(&scratch, 0); -out_close: - kernel_context_close(fixme); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c new file mode 100644 index 000000000000..de1f83100fb6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -0,0 +1,419 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gt/intel_engine_pm.h" +#include "i915_selftest.h" + +#include "gem/selftests/mock_context.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" + +struct live_mocs { + struct drm_i915_mocs_table table; + struct i915_vma *scratch; + void *vaddr; +}; + +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) +{ + int err; + + if (!get_mocs_settings(gt->i915, &arg->table)) + return -EINVAL; + + arg->scratch = create_scratch(gt); + if (IS_ERR(arg->scratch)) + return PTR_ERR(arg->scratch); + + arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB); + if (IS_ERR(arg->vaddr)) { + err = PTR_ERR(arg->vaddr); + goto err_scratch; + } + + return 0; + +err_scratch: + i915_vma_unpin_and_release(&arg->scratch, 0); + return err; +} + +static void live_mocs_fini(struct live_mocs *arg) +{ + i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP); +} + +static int read_regs(struct i915_request *rq, + u32 addr, unsigned int count, + uint32_t *offset) +{ + unsigned int i; + u32 *cs; + + GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32))); + + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0; i < count; i++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = *offset; + *cs++ = 0; + + addr += sizeof(u32); + *offset += sizeof(u32); + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static int read_mocs_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table, + uint32_t *offset) +{ + u32 addr; + + if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915)) + addr = global_mocs_offset(); + else + addr = mocs_offset(rq->engine); + + return read_regs(rq, addr, table->n_entries, offset); +} + +static int read_l3cc_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table, + uint32_t *offset) +{ + u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + + return read_regs(rq, addr, (table->n_entries + 1) / 2, offset); +} + +static int check_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table, + uint32_t **vaddr) +{ + unsigned int i; + u32 expect; + + for_each_mocs(expect, table, i) { + if (**vaddr != expect) { + pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n", + engine->name, i, **vaddr, expect); + return -EINVAL; + } + ++*vaddr; + } + + return 0; +} + +static bool mcr_range(struct drm_i915_private *i915, u32 offset) +{ + /* + * Registers in this range are affected by the MCR selector + * which only controls CPU initiated MMIO. Routing does not + * work for CS access so we cannot verify them on this path. + */ + return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff; +} + +static int check_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table, + uint32_t **vaddr) +{ + /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */ + u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + unsigned int i; + u32 expect; + + for_each_l3cc(expect, table, i) { + if (!mcr_range(engine->i915, reg) && **vaddr != expect) { + pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n", + engine->name, i, **vaddr, expect); + return -EINVAL; + } + ++*vaddr; + reg += 4; + } + + return 0; +} + +static int check_mocs_engine(struct live_mocs *arg, + struct intel_context *ce) +{ + struct i915_vma *vma = arg->scratch; + struct i915_request *rq; + u32 offset; + u32 *vaddr; + int err; + + memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (!err) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + + /* Read the mocs tables back using SRM */ + offset = i915_ggtt_offset(vma); + if (!err) + err = read_mocs_table(rq, &arg->table, &offset); + if (!err && ce->engine->class == RENDER_CLASS) + err = read_l3cc_table(rq, &arg->table, &offset); + offset -= i915_ggtt_offset(vma); + GEM_BUG_ON(offset > PAGE_SIZE); + + err = request_add_sync(rq, err); + if (err) + return err; + + /* Compare the results against the expected tables */ + vaddr = arg->vaddr; + if (!err) + err = check_mocs_table(ce->engine, &arg->table, &vaddr); + if (!err && ce->engine->class == RENDER_CLASS) + err = check_l3cc_table(ce->engine, &arg->table, &vaddr); + if (err) + return err; + + GEM_BUG_ON(arg->vaddr + offset != vaddr); + return 0; +} + +static int live_mocs_kernel(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err; + + /* Basic check the system is configured with the expected mocs table */ + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = check_mocs_engine(&mocs, engine->kernel_context); + intel_engine_pm_put(engine); + if (err) + break; + } + + live_mocs_fini(&mocs); + return err; +} + +static int live_mocs_clean(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err; + + /* Every new context should see the same mocs table */ + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + err = check_mocs_engine(&mocs, ce); + intel_context_put(ce); + if (err) + break; + } + + live_mocs_fini(&mocs); + return err; +} + +static int active_engine_reset(struct intel_context *ce, + const char *reason) +{ + struct igt_spinner spin; + struct i915_request *rq; + int err; + + err = igt_spinner_init(&spin, ce->engine->gt); + if (err) + return err; + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + if (IS_ERR(rq)) { + igt_spinner_fini(&spin); + return PTR_ERR(rq); + } + + err = request_add_spin(rq, &spin); + if (err == 0) + err = intel_engine_reset(ce->engine, reason); + + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + + return err; +} + +static int __live_mocs_reset(struct live_mocs *mocs, + struct intel_context *ce) +{ + int err; + + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + err = active_engine_reset(ce, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs"); + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + + return 0; +} + +static int live_mocs_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct live_mocs mocs; + int err = 0; + + /* Check the mocs setup is retained over per-engine and global resets */ + + if (!intel_has_reset_engine(gt)) + return 0; + + err = live_mocs_init(&mocs, gt); + if (err) + return err; + + igt_global_reset_lock(gt); + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + intel_engine_pm_get(engine); + err = __live_mocs_reset(&mocs, ce); + intel_engine_pm_put(engine); + + intel_context_put(ce); + if (err) + break; + } + igt_global_reset_unlock(gt); + + live_mocs_fini(&mocs); + return err; +} + +int intel_mocs_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_mocs_kernel), + SUBTEST(live_mocs_clean), + SUBTEST(live_mocs_reset), + }; + struct drm_i915_mocs_table table; + + if (!get_mocs_settings(i915, &table)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c new file mode 100644 index 000000000000..8cc55a0e9e06 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -0,0 +1,203 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_gt_requests.h" +#include "intel_ring.h" +#include "selftest_rc6.h" + +#include "selftests/i915_random.h" + +int live_rc6_manual(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rc6 *rc6 = >->rc6; + intel_wakeref_t wakeref; + u64 res[2]; + int err = 0; + + /* + * Our claim is that we can "encourage" the GPU to enter rc6 at will. + * Let's try it! + */ + + if (!rc6->enabled) + return 0; + + /* bsw/byt use a PCU and decouple RC6 from our manual control */ + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + /* Force RC6 off for starters */ + __intel_rc6_disable(rc6); + msleep(1); /* wakeup is not immediate, takes about 100us on icl */ + + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + msleep(250); + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + if ((res[1] - res[0]) >> 10) { + pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", + (res[1] - res[0]) >> 10); + err = -EINVAL; + goto out_unlock; + } + + /* Manually enter RC6 */ + intel_rc6_park(rc6); + + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + msleep(100); + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + + if (res[1] == res[0]) { + pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n", + intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), + intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL)); + err = -EINVAL; + } + + /* Restore what should have been the original state! */ + intel_rc6_unpark(rc6); + +out_unlock: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return err; +} + +static const u32 *__live_rc6_ctx(struct intel_context *ce) +{ + struct i915_request *rq; + const u32 *result; + u32 cmd; + u32 *cs; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return ERR_CAST(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return cs; + } + + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; + if (INTEL_GEN(rq->i915) >= 8) + cmd++; + + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO); + *cs++ = ce->timeline->hwsp_offset + 8; + *cs++ = 0; + intel_ring_advance(rq, cs); + + result = rq->hwsp_seqno + 2; + i915_request_add(rq); + + return result; +} + +static struct intel_engine_cs ** +randomised_engines(struct intel_gt *gt, + struct rnd_state *prng, + unsigned int *count) +{ + struct intel_engine_cs *engine, **engines; + enum intel_engine_id id; + int n; + + n = 0; + for_each_engine(engine, gt, id) + n++; + if (!n) + return NULL; + + engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL); + if (!engines) + return NULL; + + n = 0; + for_each_engine(engine, gt, id) + engines[n++] = engine; + + i915_prandom_shuffle(engines, sizeof(*engines), n, prng); + + *count = n; + return engines; +} + +int live_rc6_ctx_wa(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs **engines; + unsigned int n, count; + I915_RND_STATE(prng); + int err = 0; + + /* A read of CTX_INFO upsets rc6. Poke the bear! */ + if (INTEL_GEN(gt->i915) < 8) + return 0; + + engines = randomised_engines(gt, &prng, &count); + if (!engines) + return 0; + + for (n = 0; n < count; n++) { + struct intel_engine_cs *engine = engines[n]; + int pass; + + for (pass = 0; pass < 2; pass++) { + struct intel_context *ce; + unsigned int resets = + i915_reset_engine_count(>->i915->gpu_error, + engine); + const u32 *res; + + /* Use a sacrifical context */ + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + intel_engine_pm_get(engine); + res = __live_rc6_ctx(ce); + intel_engine_pm_put(engine); + intel_context_put(ce); + if (IS_ERR(res)) { + err = PTR_ERR(res); + goto out; + } + + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + intel_gt_pm_wait_for_idle(gt); + pr_debug("%s: CTX_INFO=%0x\n", + engine->name, READ_ONCE(*res)); + + if (resets != + i915_reset_engine_count(>->i915->gpu_error, + engine)) { + pr_err("%s: GPU reset required\n", + engine->name); + add_taint_for_CI(TAINT_WARN); + err = -EIO; + goto out; + } + } + } + +out: + kfree(engines); + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h new file mode 100644 index 000000000000..762fd442d7b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_RC6_H +#define SELFTEST_RC6_H + +int live_rc6_ctx_wa(void *arg); +int live_rc6_manual(void *arg); + +#endif /* SELFTEST_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index f04a59fe5d2c..e2d78cc22fb4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -458,7 +458,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) goto out; } - rq = i915_request_create(engine->kernel_context); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) goto out_unpin; @@ -675,9 +675,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - intel_engine_pm_get(engine); - rq = i915_request_create(engine->kernel_context); - intel_engine_pm_put(engine); + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index abce6e4ec9c0..ac1921854cbf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -264,22 +264,15 @@ static int switch_to_scratch_context(struct intel_engine_cs *engine, struct igt_spinner *spin) { - struct i915_gem_context *ctx; struct intel_context *ce; struct i915_request *rq; int err = 0; - ctx = kernel_context(engine->i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); - - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); - GEM_BUG_ON(IS_ERR(ce)); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); rq = igt_spinner_create_request(spin, ce, MI_NOOP); - intel_context_put(ce); if (IS_ERR(rq)) { @@ -293,7 +286,6 @@ err: if (err && spin) igt_spinner_end(spin); - kernel_context_close(ctx); return err; } @@ -367,20 +359,17 @@ out_ctx: return err; } -static struct i915_vma *create_batch(struct i915_gem_context *ctx) +static struct i915_vma *create_batch(struct i915_address_space *vm) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm; struct i915_vma *vma; int err; - obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE); if (IS_ERR(obj)) return ERR_CAST(obj); - vm = i915_gem_context_get_vm_rcu(ctx); vma = i915_vma_instance(obj, vm, NULL); - i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -452,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine) return count; } -static int check_dirty_whitelist(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int check_dirty_whitelist(struct intel_context *ce) { const u32 values[] = { 0x00000000, @@ -481,19 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; - struct i915_address_space *vm; + struct intel_engine_cs *engine = ce->engine; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - vm = i915_gem_context_get_vm_rcu(ctx); - scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1); - i915_vm_put(vm); + scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); - batch = create_batch(ctx); + batch = create_batch(ce->vm); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_scratch; @@ -518,7 +504,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; - if (INTEL_GEN(ctx->i915) >= 8) + if (INTEL_GEN(engine->i915) >= 8) lrm++, srm++; pr_debug("%s: Writing garbage to %x\n", @@ -577,7 +563,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, i915_gem_object_unpin_map(batch->obj); intel_gt_chipset_flush(engine->gt); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -696,7 +682,7 @@ out_unpin: break; } - if (igt_flush_test(ctx->i915)) + if (igt_flush_test(engine->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -709,38 +695,31 @@ static int live_dirty_whitelist(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_gem_context *ctx; enum intel_engine_id id; - struct drm_file *file; - int err = 0; /* Can the user write to the whitelisted registers? */ if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - file = mock_file(gt->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(gt->i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - for_each_engine(engine, gt, id) { + struct intel_context *ce; + int err; + if (engine->whitelist.count == 0) continue; - err = check_dirty_whitelist(ctx, engine); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = check_dirty_whitelist(ce); + intel_context_put(ce); if (err) - goto out_file; + return err; } -out_file: - mock_file_free(gt->i915, file); - return err; + return 0; } static int live_reset_whitelist(void *arg) @@ -830,12 +809,15 @@ err_req: static int scrub_whitelisted_registers(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *batch; int i, err = 0; u32 *cs; - batch = create_batch(ctx); + vm = i915_gem_context_get_vm_rcu(ctx); + batch = create_batch(vm); + i915_vm_put(vm); if (IS_ERR(batch)) return PTR_ERR(batch); diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 2a77c051f36a..aeb1d1f616e8 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h index 689efc66c908..d2bcc3df6183 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h @@ -7,6 +7,8 @@ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ +#include <linux/types.h> + struct intel_timeline; void mock_timeline_init(struct intel_timeline *timeline, u64 context); diff --git a/drivers/gpu/drm/i915/gt/uc/Makefile b/drivers/gpu/drm/i915/gt/uc/Makefile deleted file mode 100644 index bec94d434cb6..000000000000 --- a/drivers/gpu/drm/i915/gt/uc/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# For building individual subdir files on the command line -subdir-ccflags-y += -I$(srctree)/$(src)/../.. - -# Extra header tests -header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 3ee4a4e7689d..5d00a3b2d914 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -32,18 +32,17 @@ * just the HuC, but more are expected to land in the future). */ -static void gen8_guc_raise_irq(struct intel_guc *guc) +void intel_guc_notify(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - intel_uncore_write(gt->uncore, GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); -} - -static void gen11_guc_raise_irq(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - intel_uncore_write(gt->uncore, GEN11_GUC_HOST_INTERRUPT, 0); + /* + * On Gen11+, the value written to the register is passes as a payload + * to the FW. However, the FW currently treats all values the same way + * (H2G interrupt), so we can just write the value that the HW expects + * on older gens. + */ + intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER); } static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) @@ -177,15 +176,13 @@ void intel_guc_init_early(struct intel_guc *guc) mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; if (INTEL_GEN(i915) >= 11) { - guc->notify = gen11_guc_raise_irq; + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT; guc->interrupts.reset = gen11_reset_guc_interrupts; guc->interrupts.enable = gen11_enable_guc_interrupts; guc->interrupts.disable = gen11_disable_guc_interrupts; } else { - guc->notify = gen8_guc_raise_irq; + guc->notify_reg = GUC_SEND_INTERRUPT; guc->interrupts.reset = gen9_reset_guc_interrupts; guc->interrupts.enable = gen9_enable_guc_interrupts; guc->interrupts.disable = gen9_disable_guc_interrupts; @@ -401,18 +398,8 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_log_destroy(&guc->log); intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); -} -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) -{ - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; -} - -void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) -{ - WARN(1, "Unexpected event: no suitable handler\n"); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED); } /* @@ -704,3 +691,37 @@ err: i915_gem_object_put(obj); return vma; } + +/** + * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * @out_vma: return variable for the allocated vma pointer + * @out_vaddr: return variable for the obj mapping + * + * This wrapper calls intel_guc_allocate_vma() and then maps the allocated + * object with I915_MAP_WB. + * + * Return: 0 if successful, a negative errno code otherwise. + */ +int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, + struct i915_vma **out_vma, void **out_vaddr) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + *out_vma = vma; + *out_vaddr = vaddr; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e6400204a2bd..910d49590068 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -20,8 +20,8 @@ struct __guc_ads_blob; /* * Top level structure of GuC. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy - * ExecList submission. + * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList + * submission. */ struct intel_guc { struct intel_uc_fw fw; @@ -46,13 +46,13 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; - struct ida stage_ids; - struct intel_guc_client *execbuf_client; + struct i915_vma *workqueue; + void *workqueue_vaddr; + spinlock_t wq_lock; - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - /* Cyclic counter mod pagesize */ - u32 db_cacheline; + struct i915_vma *proc_desc; + void *proc_desc_vaddr; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; @@ -64,44 +64,33 @@ struct intel_guc { enum forcewake_domains fw_domains; } send_regs; + /* register used to send interrupts to the GuC FW */ + i915_reg_t notify_reg; + /* Store msg (e.g. log flush) that we see while CTBs are disabled */ u32 mmio_msg; /* To serialize the intel_guc_send actions */ struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len, - u32 *response_buf, u32 response_buf_size); - - /* GuC's FW specific event handler function */ - void (*handler)(struct intel_guc *guc); - - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); }; static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return guc->send(guc, action, len, NULL, 0); + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); } static inline int intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - return guc->send(guc, action, len, response_buf, response_buf_size); -} - -static inline void intel_guc_notify(struct intel_guc *guc) -{ - guc->notify(guc); + return intel_guc_ct_send(&guc->ct, action, len, + response_buf, response_buf_size); } static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) { - guc->handler(guc); + intel_guc_ct_event_handler(&guc->ct); } /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ @@ -136,12 +125,9 @@ void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_write_params(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size); +void intel_guc_notify(struct intel_guc *guc); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); -void intel_guc_to_host_event_handler(struct intel_guc *guc); -void intel_guc_to_host_event_handler_nop(struct intel_guc *guc); int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); @@ -149,6 +135,8 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); +int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, + struct i915_vma **out_vma, void **out_vaddr); static inline bool intel_guc_is_supported(struct intel_guc *guc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index ca6674b8e00c..101728006ae9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -93,7 +93,8 @@ static void __guc_ads_init(struct intel_guc *guc) */ blob->ads.golden_context_lrca[engine_class] = 0; blob->ads.eng_state_size[engine_class] = - intel_engine_context_size(dev_priv, engine_class) - + intel_engine_context_size(guc_to_gt(guc), + engine_class) - skipped_size; } @@ -135,32 +136,19 @@ static void __guc_ads_init(struct intel_guc *guc) int intel_guc_ads_create(struct intel_guc *guc) { const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob)); - struct i915_vma *vma; - void *blob; int ret; GEM_BUG_ON(guc->ads_vma); - vma = intel_guc_allocate_vma(guc, size); - if (IS_ERR(vma)) - return PTR_ERR(vma); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, + (void **)&guc->ads_blob); - blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(blob)) { - ret = PTR_ERR(blob); - goto err_vma; - } - - guc->ads_vma = vma; - guc->ads_blob = blob; + if (ret) + return ret; __guc_ads_init(guc); return 0; - -err_vma: - i915_vma_unpin_and_release(&guc->ads_vma, 0); - return ret; } void intel_guc_ads_destroy(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index b49115517510..c6f971a049f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -37,13 +37,10 @@ static void ct_incoming_request_worker_func(struct work_struct *w); */ void intel_guc_ct_init_early(struct intel_guc_ct *ct) { - /* we're using static channel owners */ - ct->host_channel.owner = CTB_OWNER_HOST; - - spin_lock_init(&ct->lock); - INIT_LIST_HEAD(&ct->pending_requests); - INIT_LIST_HEAD(&ct->incoming_requests); - INIT_WORK(&ct->worker, ct_incoming_request_worker_func); + spin_lock_init(&ct->requests.lock); + INIT_LIST_HEAD(&ct->requests.pending); + INIT_LIST_HEAD(&ct->requests.incoming); + INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); } static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) @@ -64,14 +61,13 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type) } static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, - u32 cmds_addr, u32 size, u32 owner) + u32 cmds_addr, u32 size) { - CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", - desc, cmds_addr, size, owner); + CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size); memset(desc, 0, sizeof(*desc)); desc->addr = cmds_addr; desc->size = size; - desc->owner = owner; + desc->owner = CTB_OWNER_HOST; } static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) @@ -104,12 +100,11 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, } static int guc_action_deregister_ct_buffer(struct intel_guc *guc, - u32 owner, u32 type) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, - owner, + CTB_OWNER_HOST, type }; int err; @@ -117,20 +112,27 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, /* Can't use generic send(), CT deregistration must go over MMIO */ err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) - DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", - guc_ct_buffer_type_to_str(type), owner, err); + DRM_ERROR("CT: deregister %s buffer failed; err=%d\n", + guc_ct_buffer_type_to_str(type), err); return err; } -static int ctch_init(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_init - Init buffer-based communication + * @ct: pointer to CT struct + * + * Allocate memory required for buffer-based communication. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_init(struct intel_guc_ct *ct) { - struct i915_vma *vma; + struct intel_guc *guc = ct_to_guc(ct); void *blob; int err; int i; - GEM_BUG_ON(ctch->vma); + GEM_BUG_ON(ct->vma); /* We allocate 1 page to hold both descriptors and both buffers. * ___________..................... @@ -154,71 +156,65 @@ static int ctch_init(struct intel_guc *guc, * other code will need updating as well. */ - /* allocate vma */ - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_out; + err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob); + if (err) { + DRM_ERROR("CT: channel allocation failed; err=%d\n", err); + return err; } - ctch->vma = vma; - /* map first page */ - blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(blob)) { - err = PTR_ERR(blob); - goto err_vma; - } CT_DEBUG_DRIVER("CT: vma base=%#x\n", - intel_guc_ggtt_offset(guc, ctch->vma)); + intel_guc_ggtt_offset(guc, ct->vma)); /* store pointers to desc and cmds */ - for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { - GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i; - ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; + for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i; + ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; } return 0; - -err_vma: - i915_vma_unpin_and_release(&ctch->vma, 0); -err_out: - CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", - ctch->owner, err); - return err; } -static void ctch_fini(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_fini - Fini buffer-based communication + * @ct: pointer to CT struct + * + * Deallocate memory required for buffer-based communication. + */ +void intel_guc_ct_fini(struct intel_guc_ct *ct) { - GEM_BUG_ON(ctch->enabled); + GEM_BUG_ON(ct->enabled); - i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP); + i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP); } -static int ctch_enable(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_enable - Enable buffer based command transport. + * @ct: pointer to CT struct + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_guc_ct_enable(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); u32 base; int err; int i; - GEM_BUG_ON(!ctch->vma); - - GEM_BUG_ON(ctch->enabled); + GEM_BUG_ON(ct->enabled); /* vma should be already allocated and map'ed */ - base = intel_guc_ggtt_offset(guc, ctch->vma); + GEM_BUG_ON(!ct->vma); + base = intel_guc_ggtt_offset(guc, ct->vma); /* (re)initialize descriptors * cmds buffers are in the second half of the blob page */ - for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - guc_ct_buffer_desc_init(ctch->ctbs[i].desc, + guc_ct_buffer_desc_init(ct->ctbs[i].desc, base + PAGE_SIZE/4 * i + PAGE_SIZE/2, - PAGE_SIZE/4, - ctch->owner); + PAGE_SIZE/4); } /* register buffers, starting wirh RECV buffer @@ -236,38 +232,42 @@ static int ctch_enable(struct intel_guc *guc, if (unlikely(err)) goto err_deregister; - ctch->enabled = true; + ct->enabled = true; return 0; err_deregister: guc_action_deregister_ct_buffer(guc, - ctch->owner, INTEL_GUC_CT_BUFFER_TYPE_RECV); err_out: - DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err); + DRM_ERROR("CT: can't open channel; err=%d\n", err); return err; } -static void ctch_disable(struct intel_guc *guc, - struct intel_guc_ct_channel *ctch) +/** + * intel_guc_ct_disable - Disable buffer based command transport. + * @ct: pointer to CT struct + */ +void intel_guc_ct_disable(struct intel_guc_ct *ct) { - GEM_BUG_ON(!ctch->enabled); + struct intel_guc *guc = ct_to_guc(ct); - ctch->enabled = false; + GEM_BUG_ON(!ct->enabled); - guc_action_deregister_ct_buffer(guc, - ctch->owner, - INTEL_GUC_CT_BUFFER_TYPE_SEND); - guc_action_deregister_ct_buffer(guc, - ctch->owner, - INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct->enabled = false; + + if (intel_guc_is_running(guc)) { + guc_action_deregister_ct_buffer(guc, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + guc_action_deregister_ct_buffer(guc, + INTEL_GUC_CT_BUFFER_TYPE_RECV); + } } -static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) +static u32 ct_get_next_fence(struct intel_guc_ct *ct) { /* For now it's trivial */ - return ++ctch->next_fence; + return ++ct->requests.next_fence; } /** @@ -440,35 +440,34 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } -static int ctch_send(struct intel_guc_ct *ct, - struct intel_guc_ct_channel *ctch, - const u32 *action, - u32 len, - u32 *response_buf, - u32 response_buf_size, - u32 *status) +static int ct_send(struct intel_guc_ct *ct, + const u32 *action, + u32 len, + u32 *response_buf, + u32 response_buf_size, + u32 *status) { - struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND]; struct guc_ct_buffer_desc *desc = ctb->desc; struct ct_request request; unsigned long flags; u32 fence; int err; - GEM_BUG_ON(!ctch->enabled); + GEM_BUG_ON(!ct->enabled); GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); - fence = ctch_get_next_fence(ctch); + fence = ct_get_next_fence(ct); request.fence = fence; request.status = 0; request.response_len = response_buf_size; request.response_buf = response_buf; - spin_lock_irqsave(&ct->lock, flags); - list_add_tail(&request.link, &ct->pending_requests); - spin_unlock_irqrestore(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); + list_add_tail(&request.link, &ct->requests.pending); + spin_unlock_irqrestore(&ct->requests.lock, flags); err = ctb_write(ctb, action, len, fence, !!response_buf); if (unlikely(err)) @@ -501,9 +500,9 @@ static int ctch_send(struct intel_guc_ct *ct, } unlink: - spin_lock_irqsave(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); list_del(&request.link); - spin_unlock_irqrestore(&ct->lock, flags); + spin_unlock_irqrestore(&ct->requests.lock, flags); return err; } @@ -511,18 +510,21 @@ unlink: /* * Command Transport (CT) buffer based GuC send function. */ -int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - struct intel_guc_ct *ct = &guc->ct; - struct intel_guc_ct_channel *ctch = &ct->host_channel; + struct intel_guc *guc = ct_to_guc(ct); u32 status = ~0; /* undefined */ int ret; + if (unlikely(!ct->enabled)) { + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; + } + mutex_lock(&guc->send_mutex); - ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size, - &status); + ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", action[0], ret, status); @@ -653,8 +655,8 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); - spin_lock(&ct->lock); - list_for_each_entry(req, &ct->pending_requests, link) { + spin_lock(&ct->requests.lock); + list_for_each_entry(req, &ct->requests.pending, link) { if (unlikely(fence != req->fence)) { CT_DEBUG_DRIVER("CT: request %u awaits response\n", req->fence); @@ -672,7 +674,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) found = true; break; } - spin_unlock(&ct->lock); + spin_unlock(&ct->requests.lock); if (!found) DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); @@ -710,13 +712,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) u32 *payload; bool done; - spin_lock_irqsave(&ct->lock, flags); - request = list_first_entry_or_null(&ct->incoming_requests, + spin_lock_irqsave(&ct->requests.lock, flags); + request = list_first_entry_or_null(&ct->requests.incoming, struct ct_incoming_request, link); if (request) list_del(&request->link); - done = !!list_empty(&ct->incoming_requests); - spin_unlock_irqrestore(&ct->lock, flags); + done = !!list_empty(&ct->requests.incoming); + spin_unlock_irqrestore(&ct->requests.lock, flags); if (!request) return true; @@ -734,12 +736,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) static void ct_incoming_request_worker_func(struct work_struct *w) { - struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker); + struct intel_guc_ct *ct = + container_of(w, struct intel_guc_ct, requests.worker); bool done; done = ct_process_incoming_requests(ct); if (!done) - queue_work(system_unbound_wq, &ct->worker); + queue_work(system_unbound_wq, &ct->requests.worker); } /** @@ -777,23 +780,28 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) } memcpy(request->msg, msg, 4 * msglen); - spin_lock_irqsave(&ct->lock, flags); - list_add_tail(&request->link, &ct->incoming_requests); - spin_unlock_irqrestore(&ct->lock, flags); + spin_lock_irqsave(&ct->requests.lock, flags); + list_add_tail(&request->link, &ct->requests.incoming); + spin_unlock_irqrestore(&ct->requests.lock, flags); - queue_work(system_unbound_wq, &ct->worker); + queue_work(system_unbound_wq, &ct->requests.worker); return 0; } -static void ct_process_host_channel(struct intel_guc_ct *ct) +/* + * When we're communicating with the GuC over CT, GuC uses events + * to notify us about new messages being posted on the RECV buffer. + */ +void intel_guc_ct_event_handler(struct intel_guc_ct *ct) { - struct intel_guc_ct_channel *ctch = &ct->host_channel; - struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV]; u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ int err = 0; - if (!ctch->enabled) + if (unlikely(!ct->enabled)) { + WARN(1, "Unexpected GuC event received while CT disabled!\n"); return; + } do { err = ctb_read(ctb, msg); @@ -812,86 +820,3 @@ static void ct_process_host_channel(struct intel_guc_ct *ct) } } -/* - * When we're communicating with the GuC over CT, GuC uses events - * to notify us about new messages being posted on the RECV buffer. - */ -void intel_guc_to_host_event_handler_ct(struct intel_guc *guc) -{ - struct intel_guc_ct *ct = &guc->ct; - - ct_process_host_channel(ct); -} - -/** - * intel_guc_ct_init - Init CT communication - * @ct: pointer to CT struct - * - * Allocate memory required for communication via - * the CT channel. - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_guc_ct_init(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - int err; - - err = ctch_init(guc, ctch); - if (unlikely(err)) { - DRM_ERROR("CT: can't open channel %d; err=%d\n", - ctch->owner, err); - return err; - } - - GEM_BUG_ON(!ctch->vma); - return 0; -} - -/** - * intel_guc_ct_fini - Fini CT communication - * @ct: pointer to CT struct - * - * Deallocate memory required for communication via - * the CT channel. - */ -void intel_guc_ct_fini(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - ctch_fini(guc, ctch); -} - -/** - * intel_guc_ct_enable - Enable buffer based command transport. - * @ct: pointer to CT struct - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_guc_ct_enable(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - if (ctch->enabled) - return 0; - - return ctch_enable(guc, ctch); -} - -/** - * intel_guc_ct_disable - Disable buffer based command transport. - * @ct: pointer to CT struct - */ -void intel_guc_ct_disable(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_guc_ct_channel *ctch = &ct->host_channel; - - if (!ctch->enabled) - return; - - ctch_disable(guc, ctch); -} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 7c24d83f5c24..3e7fe237cfa5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -35,44 +35,28 @@ struct intel_guc_ct_buffer { u32 *cmds; }; -/** Represents pair of command transport buffers. - * - * Buffers go in pairs to allow bi-directional communication. - * To simplify the code we place both of them in the same vma. - * Buffers from the same pair must share unique owner id. - * - * @vma: pointer to the vma with pair of CT buffers - * @ctbs: buffers for sending(0) and receiving(1) commands - * @owner: unique identifier - * @next_fence: fence to be used with next send command - */ -struct intel_guc_ct_channel { - struct i915_vma *vma; - struct intel_guc_ct_buffer ctbs[2]; - u32 owner; - u32 next_fence; - bool enabled; -}; -/** Holds all command transport channels. +/** Top-level structure for Command Transport related data * - * @host_channel: main channel used by the host + * Includes a pair of CT buffers for bi-directional communication and tracking + * for the H2G and G2H requests sent and received through the buffers. */ struct intel_guc_ct { - struct intel_guc_ct_channel host_channel; - /* other channels are tbd */ + struct i915_vma *vma; + bool enabled; - /** @lock: protects pending requests list */ - spinlock_t lock; + /* buffers for sending(0) and receiving(1) commands */ + struct intel_guc_ct_buffer ctbs[2]; - /** @pending_requests: list of requests waiting for response */ - struct list_head pending_requests; + struct { + u32 next_fence; /* fence to be used with next request to send */ - /** @incoming_requests: list of incoming requests */ - struct list_head incoming_requests; + spinlock_t lock; /* protects pending requests list */ + struct list_head pending; /* requests waiting for response */ - /** @worker: worker for handling incoming requests */ - struct work_struct worker; + struct list_head incoming; /* incoming requests */ + struct work_struct worker; /* handler for incoming requests */ + } requests; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); @@ -81,13 +65,13 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct); int intel_guc_ct_enable(struct intel_guc_ct *ct); void intel_guc_ct_disable(struct intel_guc_ct *ct); -static inline void intel_guc_ct_stop(struct intel_guc_ct *ct) +static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) { - ct->host_channel.enabled = false; + return ct->enabled; } -int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); -void intel_guc_to_host_event_handler_ct(struct intel_guc *guc); +void intel_guc_ct_event_handler(struct intel_guc_ct *ct); #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 5528224448f6..3a1c47d600ea 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -149,7 +149,7 @@ int intel_guc_fw_upload(struct intel_guc *guc) * Current uCode expects the code to be loaded at 8k; locations below * this are used for the stack. */ - ret = intel_uc_fw_upload(&guc->fw, gt, 0x2000, UOS_MOVE); + ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index a26a85d50209..a6b733c146c9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -31,7 +31,6 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_DB_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) /* Work queue item header definitions */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 2498c55e0ea5..9e42324fdecd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -18,15 +18,6 @@ #include "i915_drv.h" #include "i915_trace.h" -enum { - GUC_PREEMPT_NONE = 0, - GUC_PREEMPT_INPROGRESS, - GUC_PREEMPT_FINISHED, -}; -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 -#define GUC_PREEMPT_BREADCRUMB_BYTES \ - (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) - /** * DOC: GuC-based command submission * @@ -36,25 +27,14 @@ enum { * code) matches the old submission model and will be updated as part of the * upgrade to the new flow. * - * GuC client: - * A intel_guc_client refers to a submission path through GuC. Currently, there - * is only one client, which is charged with all submissions to the GuC. This - * struct is the owner of a doorbell, a process descriptor and a workqueue (all - * of them inside a single gem object that contains all required pages for these - * elements). - * * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a intel_guc_client and a - * guc_stage_desc (via the client's stage_id), so effectively only one - * gets used. This stage descriptor lets the GuC know about the doorbell, - * workqueue and process descriptor. Theoretically, it also lets the GuC - * know about our HW contexts (context ID, etc...), but we actually - * employ a kind of submission where the GuC uses the LRCA sent via the work - * item instead (the single guc_stage_desc associated to execbuf client - * contains information about the default kernel context only, but this is - * essentially unused). This is called a "proxy" submission. + * descriptors, and shares them with the GuC. Currently, we only use one + * descriptor. This stage descriptor lets the GuC know about the workqueue and + * process descriptor. Theoretically, it also lets the GuC know about our HW + * contexts (context ID, etc...), but we actually employ a kind of submission + * where the GuC uses the LRCA sent via the work item instead. This is called + * a "proxy" submission. * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes @@ -63,11 +43,6 @@ enum { * Firmware writes a success/fail code back to the action register after * processes the request. The kernel driver polls waiting for this update and * then proceeds. - * See intel_guc_send() - * - * Doorbells: - * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) - * mapped into process space. * * Work Items: * There are several types of work items that the host may place into a @@ -84,213 +59,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static inline bool is_high_priority(struct intel_guc_client *client) +static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) { - return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || - client->priority == GUC_CLIENT_PRIORITY_HIGH); -} - -static int reserve_doorbell(struct intel_guc_client *client) -{ - unsigned long offset; - unsigned long end; - u16 id; - - GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); + struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; - /* - * The bitmap tracks which doorbell registers are currently in use. - * It is split into two halves; the first half is used for normal - * priority contexts, the second half for high-priority ones. - */ - offset = 0; - end = GUC_NUM_DOORBELLS / 2; - if (is_high_priority(client)) { - offset = end; - end += offset; - } - - id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset); - if (id == end) - return -ENOSPC; - - __set_bit(id, client->guc->doorbell_bitmap); - client->doorbell_id = id; - DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", - client->stage_id, yesno(is_high_priority(client)), - id); - return 0; + return &base[id]; } -static bool has_doorbell(struct intel_guc_client *client) +static int guc_workqueue_create(struct intel_guc *guc) { - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); + return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, + &guc->workqueue_vaddr); } -static void unreserve_doorbell(struct intel_guc_client *client) +static void guc_workqueue_destroy(struct intel_guc *guc) { - GEM_BUG_ON(!has_doorbell(client)); - - __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); - client->doorbell_id = GUC_DOORBELL_INVALID; + i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); } /* - * Tell the GuC to allocate or deallocate a specific doorbell - */ - -static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) -{ - u32 action[] = { - INTEL_GUC_ACTION_ALLOCATE_DOORBELL, - stage_id - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - -static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) -{ - u32 action[] = { - INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, - stage_id - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - -static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) -{ - struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; - - return &base[client->stage_id]; -} - -/* - * Initialise, update, or clear doorbell data shared with the GuC - * - * These functions modify shared data and so need access to the mapped - * client object which contains the page being used for the doorbell + * Initialise the process descriptor shared with the GuC firmware. */ - -static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id) -{ - struct guc_stage_desc *desc; - - /* Update the GuC's idea of the doorbell ID */ - desc = __get_stage_desc(client); - desc->db_id = new_id; -} - -static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) -{ - return client->vaddr + client->doorbell_offset; -} - -static bool __doorbell_valid(struct intel_guc *guc, u16 db_id) -{ - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; - - GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); - return intel_uncore_read(uncore, GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID; -} - -static void __init_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *doorbell; - - doorbell = __get_doorbell(client); - doorbell->db_status = GUC_DOORBELL_ENABLED; - doorbell->cookie = 0; -} - -static void __fini_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *doorbell; - u16 db_id = client->doorbell_id; - - doorbell = __get_doorbell(client); - doorbell->db_status = GUC_DOORBELL_DISABLED; - - /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit - * to go to zero after updating db_status before we call the GuC to - * release the doorbell - */ - if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10)) - WARN_ONCE(true, "Doorbell never became invalid after disable\n"); -} - -static int create_doorbell(struct intel_guc_client *client) +static int guc_proc_desc_create(struct intel_guc *guc) { - int ret; - - if (WARN_ON(!has_doorbell(client))) - return -ENODEV; /* internal setup error, should never happen */ - - __update_doorbell_desc(client, client->doorbell_id); - __init_doorbell(client); - - ret = __guc_allocate_doorbell(client->guc, client->stage_id); - if (ret) { - __fini_doorbell(client); - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n", - client->stage_id, ret); - return ret; - } + const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); - return 0; + return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, + &guc->proc_desc_vaddr); } -static int destroy_doorbell(struct intel_guc_client *client) +static void guc_proc_desc_destroy(struct intel_guc *guc) { - int ret; - - GEM_BUG_ON(!has_doorbell(client)); - - __fini_doorbell(client); - ret = __guc_deallocate_doorbell(client->guc, client->stage_id); - if (ret) - DRM_ERROR("Couldn't destroy client %u doorbell: %d\n", - client->stage_id, ret); - - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - - return ret; + i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); } -static unsigned long __select_cacheline(struct intel_guc *guc) -{ - unsigned long offset; - - /* Doorbell uses a single cache line within a page */ - offset = offset_in_page(guc->db_cacheline); - - /* Moving to next cache line to reduce contention */ - guc->db_cacheline += cache_line_size(); - - DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cache_line_size()); - return offset; -} - -static inline struct guc_process_desc * -__get_process_desc(struct intel_guc_client *client) -{ - return client->vaddr + client->proc_desc_offset; -} - -/* - * Initialise the process descriptor shared with the GuC firmware. - */ -static void guc_proc_desc_init(struct intel_guc_client *client) +static void guc_proc_desc_init(struct intel_guc *guc) { struct guc_process_desc *desc; - desc = memset(__get_process_desc(client), 0, sizeof(*desc)); + desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); /* * XXX: pDoorbell and WQVBaseAddress are pointers in process address @@ -301,47 +108,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client) desc->wq_base_addr = 0; desc->db_base_addr = 0; - desc->stage_id = client->stage_id; desc->wq_size_bytes = GUC_WQ_SIZE; desc->wq_status = WQ_STATUS_ACTIVE; - desc->priority = client->priority; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; } -static void guc_proc_desc_fini(struct intel_guc_client *client) +static void guc_proc_desc_fini(struct intel_guc *guc) { - struct guc_process_desc *desc; - - desc = __get_process_desc(client); - memset(desc, 0, sizeof(*desc)); + memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); } static int guc_stage_desc_pool_create(struct intel_guc *guc) { - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, - PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS)); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } - - guc->stage_desc_pool = vma; - guc->stage_desc_pool_vaddr = vaddr; - ida_init(&guc->stage_ids); + u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS); - return 0; + return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, + &guc->stage_desc_pool_vaddr); } static void guc_stage_desc_pool_destroy(struct intel_guc *guc) { - ida_destroy(&guc->stage_ids); i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); } @@ -349,63 +136,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) * Initialise/clear the stage descriptor shared with the GuC firmware. * * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures relating to this client (doorbell, process descriptor, - * write queue, etc). + * data structures related to work submission (process descriptor, write queue, + * etc). */ -static void guc_stage_desc_init(struct intel_guc_client *client) +static void guc_stage_desc_init(struct intel_guc *guc) { - struct intel_guc *guc = client->guc; struct guc_stage_desc *desc; - u32 gfx_addr; - desc = __get_stage_desc(client); + /* we only use 1 stage desc, so hardcode it to 0 */ + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; - if (is_high_priority(client)) - desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; - desc->stage_id = client->stage_id; - desc->priority = client->priority; - desc->db_id = client->doorbell_id; - /* - * The doorbell, process descriptor, and workqueue are all parts - * of the client object, which the GuC will reference via the GGTT - */ - gfx_addr = intel_guc_ggtt_offset(guc, client->vma); - desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + - client->doorbell_offset; - desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); - desc->db_trigger_uk = gfx_addr + client->doorbell_offset; - desc->process_desc = gfx_addr + client->proc_desc_offset; - desc->wq_addr = gfx_addr + GUC_DB_SIZE; - desc->wq_size = GUC_WQ_SIZE; + desc->stage_id = 0; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; - desc->desc_private = ptr_to_u64(client); + desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); + desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); + desc->wq_size = GUC_WQ_SIZE; } -static void guc_stage_desc_fini(struct intel_guc_client *client) +static void guc_stage_desc_fini(struct intel_guc *guc) { struct guc_stage_desc *desc; - desc = __get_stage_desc(client); + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); } /* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct intel_guc_client *client, +static void guc_wq_item_append(struct intel_guc *guc, u32 target_engine, u32 context_desc, u32 ring_tail, u32 fence_id) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct guc_process_desc *desc = __get_process_desc(client); + struct guc_process_desc *desc = guc->proc_desc_vaddr; struct guc_wq_item *wqi; u32 wq_off; - lockdep_assert_held(&client->wq_lock); + lockdep_assert_held(&guc->wq_lock); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -425,58 +198,30 @@ static void guc_wq_item_append(struct intel_guc_client *client, GUC_WQ_SIZE) < wqi_size); GEM_BUG_ON(wq_off & (wqi_size - 1)); - /* WQ starts from the page after doorbell / process_desc */ - wqi = client->vaddr + wq_off + GUC_DB_SIZE; - - if (I915_SELFTEST_ONLY(client->use_nop_wqi)) { - wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT); - } else { - /* Now fill in the 4-word work queue item */ - wqi->header = WQ_TYPE_INORDER | - (wqi_len << WQ_LEN_SHIFT) | - (target_engine << WQ_TARGET_SHIFT) | - WQ_NO_WCFLUSH_WAIT; - wqi->context_desc = context_desc; - wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; - GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); - wqi->fence_id = fence_id; - } + wqi = guc->workqueue_vaddr + wq_off; + + /* Now fill in the 4-word work queue item */ + wqi->header = WQ_TYPE_INORDER | + (wqi_len << WQ_LEN_SHIFT) | + (target_engine << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + wqi->context_desc = context_desc; + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); + wqi->fence_id = fence_id; /* Make the update visible to GuC */ WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); } -static void guc_ring_doorbell(struct intel_guc_client *client) -{ - struct guc_doorbell_info *db; - u32 cookie; - - lockdep_assert_held(&client->wq_lock); - - /* pointer of current doorbell cacheline */ - db = __get_doorbell(client); - - /* - * We're not expecting the doorbell cookie to change behind our back, - * we also need to treat 0 as a reserved value. - */ - cookie = READ_ONCE(db->cookie); - WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie); - - /* XXX: doorbell was lost and need to acquire it again */ - GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); -} - static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); + u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - guc_wq_item_append(client, engine->guc_id, ctx_desc, + guc_wq_item_append(guc, engine->guc_id, ctx_desc, ring_tail, rq->fence.seqno); - guc_ring_doorbell(client); } /* @@ -488,10 +233,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) */ static void flush_ggtt_writes(struct i915_vma *vma) { - struct drm_i915_private *i915 = vma->vm->i915; - if (i915_vma_is_map_and_fenceable(vma)) - intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS); + intel_uncore_posting_read_fw(vma->vm->gt->uncore, + GUC_STATUS); } static void guc_submit(struct intel_engine_cs *engine, @@ -499,9 +243,8 @@ static void guc_submit(struct intel_engine_cs *engine, struct i915_request **end) { struct intel_guc *guc = &engine->gt->uc.guc; - struct intel_guc_client *client = guc->execbuf_client; - spin_lock(&client->wq_lock); + spin_lock(&guc->wq_lock); do { struct i915_request *rq = *out++; @@ -510,7 +253,7 @@ static void guc_submit(struct intel_engine_cs *engine, guc_add_request(guc, rq); } while (out != end); - spin_unlock(&client->wq_lock); + spin_unlock(&guc->wq_lock); } static inline int rq_prio(const struct i915_request *rq) @@ -529,7 +272,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx) * required if we generalise the inflight tracking. */ - intel_gt_pm_get(rq->engine->gt); + __intel_gt_pm_get(rq->engine->gt); return i915_request_get(rq); } @@ -537,7 +280,7 @@ static void schedule_out(struct i915_request *rq) { trace_i915_request_out(rq); - intel_gt_pm_put(rq->engine->gt); + intel_gt_pm_put_async(rq->engine->gt); i915_request_put(rq); } @@ -572,7 +315,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (last && rq->hw_context != last->hw_context) { + if (last && rq->context != last->context) { if (port == last_port) goto done; @@ -631,7 +374,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Prevent request submission to the hardware until we have @@ -658,7 +401,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) memset(execlists->inflight, 0, sizeof(execlists->inflight)); } -static void guc_reset(struct intel_engine_cs *engine, bool stalled) +static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq; @@ -677,20 +420,20 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) stalled = false; __i915_request_reset(rq, stalled); - intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); + intel_lr_context_reset(engine, rq->context, rq->head, stalled); out_unlock: spin_unlock_irqrestore(&engine->active.lock, flags); } -static void guc_cancel_requests(struct intel_engine_cs *engine) +static void guc_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + ENGINE_TRACE(engine, "\n"); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -751,8 +494,8 @@ static void guc_reset_finish(struct intel_engine_cs *engine) /* And kick in case we missed a new request submission. */ tasklet_hi_schedule(&execlists->tasklet); - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); } /* @@ -761,213 +504,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine) * path of guc_submit() above. */ -/* Check that a doorbell register is in the expected state */ -static bool doorbell_ok(struct intel_guc *guc, u16 db_id) -{ - bool valid; - - GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS); - - valid = __doorbell_valid(guc, db_id); - - if (test_bit(db_id, guc->doorbell_bitmap) == valid) - return true; - - DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n", - db_id, yesno(valid)); - - return false; -} - -static bool guc_verify_doorbells(struct intel_guc *guc) -{ - bool doorbells_ok = true; - u16 db_id; - - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) - if (!doorbell_ok(guc, db_id)) - doorbells_ok = false; - - return doorbells_ok; -} - -/** - * guc_client_alloc() - Allocate an intel_guc_client - * @guc: the intel_guc structure - * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. - * - * Return: An intel_guc_client object if success, else NULL. - */ -static struct intel_guc_client * -guc_client_alloc(struct intel_guc *guc, u32 priority) -{ - struct intel_guc_client *client; - struct i915_vma *vma; - void *vaddr; - int ret; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return ERR_PTR(-ENOMEM); - - client->guc = guc; - client->priority = priority; - client->doorbell_id = GUC_DOORBELL_INVALID; - spin_lock_init(&client->wq_lock); - - ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); - if (ret < 0) - goto err_client; - - client->stage_id = ret; - - /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_id; - } - - /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->vma = vma; - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - client->vaddr = vaddr; - - ret = reserve_doorbell(client); - if (ret) - goto err_vaddr; - - client->doorbell_offset = __select_cacheline(guc); - - /* - * Since the doorbell only requires a single cacheline, we can save - * space by putting the application process descriptor in the same - * page. Use the half of the page that doesn't include the doorbell. - */ - if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) - client->proc_desc_offset = 0; - else - client->proc_desc_offset = (GUC_DB_SIZE / 2); - - DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n", - priority, client, client->stage_id); - DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", - client->doorbell_id, client->doorbell_offset); - - return client; - -err_vaddr: - i915_gem_object_unpin_map(client->vma->obj); -err_vma: - i915_vma_unpin_and_release(&client->vma, 0); -err_id: - ida_simple_remove(&guc->stage_ids, client->stage_id); -err_client: - kfree(client); - return ERR_PTR(ret); -} - -static void guc_client_free(struct intel_guc_client *client) -{ - unreserve_doorbell(client); - i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); - ida_simple_remove(&client->guc->stage_ids, client->stage_id); - kfree(client); -} - -static inline bool ctx_save_restore_disabled(struct intel_context *ce) -{ - u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1]; - -#define SR_DISABLED \ - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \ - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) - - return (sr & SR_DISABLED) == SR_DISABLED; - -#undef SR_DISABLED -} - -static int guc_clients_create(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - GEM_BUG_ON(guc->execbuf_client); - - client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for submission!\n"); - return PTR_ERR(client); - } - guc->execbuf_client = client; - - return 0; -} - -static void guc_clients_destroy(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - client = fetch_and_zero(&guc->execbuf_client); - if (client) - guc_client_free(client); -} - -static int __guc_client_enable(struct intel_guc_client *client) -{ - int ret; - - guc_proc_desc_init(client); - guc_stage_desc_init(client); - - ret = create_doorbell(client); - if (ret) - goto fail; - - return 0; - -fail: - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); - return ret; -} - -static void __guc_client_disable(struct intel_guc_client *client) -{ - /* - * By the time we're here, GuC may have already been reset. if that is - * the case, instead of trying (in vain) to communicate with it, let's - * just cleanup the doorbell HW and our internal state. - */ - if (intel_guc_is_running(client->guc)) - destroy_doorbell(client); - else - __fini_doorbell(client); - - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); -} - -static int guc_clients_enable(struct intel_guc *guc) -{ - return __guc_client_enable(guc->execbuf_client); -} - -static void guc_clients_disable(struct intel_guc *guc) -{ - if (guc->execbuf_client) - __guc_client_disable(guc->execbuf_client); -} - /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -988,13 +524,20 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - WARN_ON(!guc_verify_doorbells(guc)); - ret = guc_clients_create(guc); + ret = guc_workqueue_create(guc); if (ret) goto err_pool; + ret = guc_proc_desc_create(guc); + if (ret) + goto err_workqueue; + + spin_lock_init(&guc->wq_lock); + return 0; +err_workqueue: + guc_workqueue_destroy(guc); err_pool: guc_stage_desc_pool_destroy(guc); return ret; @@ -1002,83 +545,37 @@ err_pool: void intel_guc_submission_fini(struct intel_guc *guc) { - guc_clients_destroy(guc); - WARN_ON(!guc_verify_doorbells(guc)); - - if (guc->stage_desc_pool) + if (guc->stage_desc_pool) { + guc_proc_desc_destroy(guc); + guc_workqueue_destroy(guc); guc_stage_desc_pool_destroy(guc); + } } static void guc_interrupts_capture(struct intel_gt *gt) { - struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; + u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; + u32 dmask = irqs << 16 | irqs; - /* tell all command streamers to forward interrupts (but not vblank) - * to GuC - */ - irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt, id) - ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); - - /* route USER_INTERRUPT to Host, all others are sent to GuC. */ - irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - /* These three registers have the same bit definitions */ - intel_uncore_write(uncore, GUC_BCS_RCS_IER, ~irqs); - intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, ~irqs); - intel_uncore_write(uncore, GUC_WD_VECS_IER, ~irqs); + GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); - /* - * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all - * (unmasked) PM interrupts to the GuC. All other bits of this - * register *disable* generation of a specific interrupt. - * - * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when - * writing to the PM interrupt mask register, i.e. interrupts - * that must not be disabled. - * - * If the GuC is handling these interrupts, then we must not let - * the PM code disable ANY interrupt that the GuC is expecting. - * So for each ENABLED (0) bit in this register, we must SET the - * bit in pm_intrmsk_mbz so that it's left enabled for the GuC. - * GuC needs ARAT expired interrupt unmasked hence it is set in - * pm_intrmsk_mbz. - * - * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will - * result in the register bit being left SET! - */ - rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + /* Don't handle the ctx switch interrupt in GuC submission mode */ + intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0); + intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0); } static void guc_interrupts_release(struct intel_gt *gt) { - struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; + u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; + u32 dmask = irqs << 16 | irqs; - /* - * tell all command streamers NOT to forward interrupts or vblank - * to GuC. - */ - irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); - irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt, id) - ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); - - /* route all GT interrupts to the host */ - intel_uncore_write(uncore, GUC_BCS_RCS_IER, 0); - intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, 0); - intel_uncore_write(uncore, GUC_WD_VECS_IER, 0); - - rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; + GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); + + /* Handle ctx switch interrupts again */ + intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask); + intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); } static void guc_set_default_submission(struct intel_engine_cs *engine) @@ -1102,11 +599,10 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->park = engine->unpark = NULL; engine->reset.prepare = guc_reset_prepare; - engine->reset.reset = guc_reset; + engine->reset.rewind = guc_reset_rewind; + engine->reset.cancel = guc_reset_cancel; engine->reset.finish = guc_reset_finish; - engine->cancel_requests = guc_cancel_requests; - engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; @@ -1119,16 +615,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) GEM_BUG_ON(engine->irq_enable || engine->irq_disable); } -int intel_guc_submission_enable(struct intel_guc *guc) +void intel_guc_submission_enable(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; enum intel_engine_id id; - int err; - - err = i915_inject_probe_error(gt->i915, -ENXIO); - if (err) - return err; /* * We're using GuC work items for submitting work through GuC. Since @@ -1143,11 +634,8 @@ int intel_guc_submission_enable(struct intel_guc *guc) sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); - GEM_BUG_ON(!guc->execbuf_client); - - err = guc_clients_enable(guc); - if (err) - return err; + guc_proc_desc_init(guc); + guc_stage_desc_init(guc); /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); @@ -1156,8 +644,6 @@ int intel_guc_submission_enable(struct intel_guc *guc) engine->set_default_submission = guc_set_default_submission; engine->set_default_submission(engine); } - - return 0; } void intel_guc_submission_disable(struct intel_guc *guc) @@ -1166,8 +652,12 @@ void intel_guc_submission_disable(struct intel_guc *guc) GEM_BUG_ON(gt->awake); /* GT should be parked first */ + /* Note: By the time we're here, GuC may have already been reset */ + guc_interrupts_release(gt); - guc_clients_disable(guc); + + guc_stage_desc_fini(guc); + guc_proc_desc_fini(guc); } static bool __guc_submission_support(struct intel_guc *guc) @@ -1186,6 +676,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc) guc->submission_supported = __guc_submission_support(guc); } -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_guc.c" -#endif +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == guc_set_default_submission; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 54d716828352..e402a2932592 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -6,62 +6,18 @@ #ifndef _INTEL_GUC_SUBMISSION_H_ #define _INTEL_GUC_SUBMISSION_H_ -#include <linux/spinlock.h> +#include <linux/types.h> -#include "gt/intel_engine_types.h" - -#include "i915_gem.h" -#include "i915_selftest.h" - -struct drm_i915_private; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct intel_guc_client { - struct i915_vma *vma; - void *vaddr; - struct intel_guc *guc; - - /* bitmap of (host) engine ids */ - u32 priority; - u32 stage_id; - u32 proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - - /* Protects GuC client's WQ access */ - spinlock_t wq_lock; - - /* For testing purposes, use nop WQ items instead of real ones */ - I915_SELFTEST_DECLARE(bool use_nop_wqi); -}; +struct intel_guc; +struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); int intel_guc_submission_init(struct intel_guc *guc); -int intel_guc_submission_enable(struct intel_guc *guc); +void intel_guc_submission_enable(struct intel_guc *guc); void intel_guc_submission_disable(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index d654340d4d03..eee193bf2cc4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -39,5 +39,5 @@ void intel_huc_fw_init_early(struct intel_huc *huc) int intel_huc_fw_upload(struct intel_huc *huc) { /* HW doesn't look at destination address for HuC, so set it to 0 */ - return intel_uc_fw_upload(&huc->fw, huc_to_gt(huc), 0, HUC_UKERNEL); + return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 629b19377a29..64934a876a50 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -12,6 +12,9 @@ #include "i915_drv.h" +static const struct intel_uc_ops uc_ops_off; +static const struct intel_uc_ops uc_ops_on; + /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct intel_uc *uc) @@ -89,6 +92,11 @@ void intel_uc_init_early(struct intel_uc *uc) intel_huc_init_early(&uc->huc); __confirm_options(uc); + + if (intel_uc_uses_guc(uc)) + uc->ops = &uc_ops_on; + else + uc->ops = &uc_ops_off; } void intel_uc_driver_late_release(struct intel_uc *uc) @@ -123,6 +131,11 @@ static void __uc_free_load_err_log(struct intel_uc *uc) i915_gem_object_put(log); } +static inline bool guc_communication_enabled(struct intel_guc *guc) +{ + return intel_guc_ct_enabled(&guc->ct); +} + /* * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 * register using the same bits used in the CT message payload. Since our @@ -158,7 +171,7 @@ static void guc_handle_mmio_msg(struct intel_guc *guc) struct drm_i915_private *i915 = guc_to_gt(guc)->i915; /* we need communication to be enabled to reply to GuC */ - GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop); + GEM_BUG_ON(!guc_communication_enabled(guc)); if (!guc->mmio_msg) return; @@ -185,11 +198,6 @@ static void guc_disable_interrupts(struct intel_guc *guc) guc->interrupts.disable(guc); } -static inline bool guc_communication_enabled(struct intel_guc *guc) -{ - return guc->send != intel_guc_send_nop; -} - static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; @@ -205,9 +213,6 @@ static int guc_enable_communication(struct intel_guc *guc) if (ret) return ret; - guc->send = intel_guc_send_ct; - guc->handler = intel_guc_to_host_event_handler_ct; - /* check for mmio messages received before/during the CT enable */ guc_get_mmio_msg(guc); guc_handle_mmio_msg(guc); @@ -216,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc) /* check for CT messages received before we enabled interrupts */ spin_lock_irq(&i915->irq_lock); - intel_guc_to_host_event_handler_ct(guc); + intel_guc_ct_event_handler(&guc->ct); spin_unlock_irq(&i915->irq_lock); DRM_INFO("GuC communication enabled\n"); @@ -224,7 +229,7 @@ static int guc_enable_communication(struct intel_guc *guc) return 0; } -static void __guc_stop_communication(struct intel_guc *guc) +static void guc_disable_communication(struct intel_guc *guc) { /* * Events generated during or after CT disable are logged by guc in @@ -235,23 +240,6 @@ static void __guc_stop_communication(struct intel_guc *guc) guc_disable_interrupts(guc); - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; -} - -static void guc_stop_communication(struct intel_guc *guc) -{ - intel_guc_ct_stop(&guc->ct); - - __guc_stop_communication(guc); - - DRM_INFO("GuC communication stopped\n"); -} - -static void guc_disable_communication(struct intel_guc *guc) -{ - __guc_stop_communication(guc); - intel_guc_ct_disable(&guc->ct); /* @@ -265,41 +253,33 @@ static void guc_disable_communication(struct intel_guc *guc) DRM_INFO("GuC communication disabled\n"); } -void intel_uc_fetch_firmwares(struct intel_uc *uc) +static void __uc_fetch_firmwares(struct intel_uc *uc) { - struct drm_i915_private *i915 = uc_to_gt(uc)->i915; int err; - if (!intel_uc_uses_guc(uc)) - return; + GEM_BUG_ON(!intel_uc_uses_guc(uc)); - err = intel_uc_fw_fetch(&uc->guc.fw, i915); + err = intel_uc_fw_fetch(&uc->guc.fw); if (err) return; if (intel_uc_uses_huc(uc)) - intel_uc_fw_fetch(&uc->huc.fw, i915); + intel_uc_fw_fetch(&uc->huc.fw); } -void intel_uc_cleanup_firmwares(struct intel_uc *uc) +static void __uc_cleanup_firmwares(struct intel_uc *uc) { - if (!intel_uc_uses_guc(uc)) - return; - - if (intel_uc_uses_huc(uc)) - intel_uc_fw_cleanup_fetch(&uc->huc.fw); - + intel_uc_fw_cleanup_fetch(&uc->huc.fw); intel_uc_fw_cleanup_fetch(&uc->guc.fw); } -void intel_uc_init(struct intel_uc *uc) +static void __uc_init(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret; - if (!intel_uc_uses_guc(uc)) - return; + GEM_BUG_ON(!intel_uc_uses_guc(uc)); /* XXX: GuC submission is unavailable for now */ GEM_BUG_ON(intel_uc_supports_guc_submission(uc)); @@ -314,17 +294,10 @@ void intel_uc_init(struct intel_uc *uc) intel_huc_init(huc); } -void intel_uc_fini(struct intel_uc *uc) +static void __uc_fini(struct intel_uc *uc) { - struct intel_guc *guc = &uc->guc; - - if (!intel_uc_uses_guc(uc)) - return; - - if (intel_uc_uses_huc(uc)) - intel_huc_fini(&uc->huc); - - intel_guc_fini(guc); + intel_huc_fini(&uc->huc); + intel_guc_fini(&uc->guc); __uc_free_load_err_log(uc); } @@ -342,14 +315,6 @@ static int __uc_sanitize(struct intel_uc *uc) return __intel_uc_reset_hw(uc); } -void intel_uc_sanitize(struct intel_uc *uc) -{ - if (!intel_uc_supports_guc(uc)) - return; - - __uc_sanitize(uc); -} - /* Initialize and verify the uC regs related to uC positioning in WOPCM */ static int uc_init_wopcm(struct intel_uc *uc) { @@ -413,13 +378,8 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc) (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID); } -int intel_uc_init_hw(struct intel_uc *uc) +static int __uc_check_hw(struct intel_uc *uc) { - struct drm_i915_private *i915 = uc_to_gt(uc)->i915; - struct intel_guc *guc = &uc->guc; - struct intel_huc *huc = &uc->huc; - int ret, attempts; - if (!intel_uc_supports_guc(uc)) return 0; @@ -428,11 +388,24 @@ int intel_uc_init_hw(struct intel_uc *uc) * before on this system after reboot, otherwise we risk GPU hangs. * To check if GuC was loaded before we look at WOPCM registers. */ - if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc)) - return 0; + if (uc_is_wopcm_locked(uc)) + return -EIO; + + return 0; +} + +static int __uc_init_hw(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + struct intel_guc *guc = &uc->guc; + struct intel_huc *huc = &uc->huc; + int ret, attempts; + + GEM_BUG_ON(!intel_uc_supports_guc(uc)); + GEM_BUG_ON(!intel_uc_uses_guc(uc)); if (!intel_uc_fw_is_available(&guc->fw)) { - ret = uc_is_wopcm_locked(uc) || + ret = __uc_check_hw(uc) || intel_uc_fw_is_overridden(&guc->fw) || intel_uc_supports_guc_submission(uc) ? intel_uc_fw_status_to_error(guc->fw.status) : 0; @@ -486,11 +459,8 @@ int intel_uc_init_hw(struct intel_uc *uc) if (ret) goto err_communication; - if (intel_uc_supports_guc_submission(uc)) { - ret = intel_guc_submission_enable(guc); - if (ret) - goto err_communication; - } + if (intel_uc_supports_guc_submission(uc)) + intel_guc_submission_enable(guc); dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, @@ -531,7 +501,7 @@ err_out: return -EIO; } -void intel_uc_fini_hw(struct intel_uc *uc) +static void __uc_fini_hw(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; @@ -560,7 +530,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc) if (!intel_guc_is_running(guc)) return; - guc_stop_communication(guc); + guc_disable_communication(guc); __uc_sanitize(uc); } @@ -631,3 +601,20 @@ int intel_uc_runtime_resume(struct intel_uc *uc) */ return __uc_resume(uc, true); } + +static const struct intel_uc_ops uc_ops_off = { + .init_hw = __uc_check_hw, +}; + +static const struct intel_uc_ops uc_ops_on = { + .sanitize = __uc_sanitize, + + .init_fw = __uc_fetch_firmwares, + .fini_fw = __uc_cleanup_firmwares, + + .init = __uc_init, + .fini = __uc_fini, + + .init_hw = __uc_init_hw, + .fini_hw = __uc_fini_hw, +}; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 527995c21196..49c913524686 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -10,7 +10,20 @@ #include "intel_huc.h" #include "i915_params.h" +struct intel_uc; + +struct intel_uc_ops { + int (*sanitize)(struct intel_uc *uc); + void (*init_fw)(struct intel_uc *uc); + void (*fini_fw)(struct intel_uc *uc); + void (*init)(struct intel_uc *uc); + void (*fini)(struct intel_uc *uc); + int (*init_hw)(struct intel_uc *uc); + void (*fini_hw)(struct intel_uc *uc); +}; + struct intel_uc { + struct intel_uc_ops const *ops; struct intel_guc guc; struct intel_huc huc; @@ -21,13 +34,6 @@ struct intel_uc { void intel_uc_init_early(struct intel_uc *uc); void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); -void intel_uc_fetch_firmwares(struct intel_uc *uc); -void intel_uc_cleanup_firmwares(struct intel_uc *uc); -void intel_uc_sanitize(struct intel_uc *uc); -void intel_uc_init(struct intel_uc *uc); -int intel_uc_init_hw(struct intel_uc *uc); -void intel_uc_fini_hw(struct intel_uc *uc); -void intel_uc_fini(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); void intel_uc_runtime_suspend(struct intel_uc *uc); @@ -64,4 +70,20 @@ static inline bool intel_uc_uses_huc(struct intel_uc *uc) return intel_huc_is_enabled(&uc->huc); } +#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ +static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ +{ \ + if (uc->ops->_OPS) \ + return uc->ops->_OPS(uc); \ + return _RET; \ +} +intel_uc_ops_function(sanitize, sanitize, int, 0); +intel_uc_ops_function(fetch_firmwares, init_fw, void, ); +intel_uc_ops_function(cleanup_firmwares, fini_fw, void, ); +intel_uc_ops_function(init, init, void, ); +intel_uc_ops_function(fini, fini, void, ); +intel_uc_ops_function(init_hw, init_hw, int, 0); +intel_uc_ops_function(fini_hw, fini_hw, void, ); +#undef intel_uc_ops_function + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 66a30ab7044a..8ee0a0c7f447 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -11,7 +11,6 @@ #include "intel_uc_fw_abi.h" #include "i915_drv.h" -#ifdef CONFIG_DRM_I915_DEBUG_GUC static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) { GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); @@ -22,6 +21,7 @@ static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) return container_of(uc_fw, struct intel_gt, uc.huc.fw); } +#ifdef CONFIG_DRM_I915_DEBUG_GUC void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, enum intel_uc_fw_status status) { @@ -219,10 +219,9 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); } -static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, - struct drm_i915_private *i915, - int e) +static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) { + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; bool user = e == -EINVAL; if (i915_inject_probe_error(i915, e)) { @@ -260,14 +259,14 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware - * @i915: device private * * Fetch uC firmware into GEM obj. * * Return: 0 on success, a negative errno code on failure. */ -int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) { + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; @@ -282,8 +281,8 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) if (err) return err; - __force_fw_fetch_failures(uc_fw, i915, -EINVAL); - __force_fw_fetch_failures(uc_fw, i915, -ESTALE); + __force_fw_fetch_failures(uc_fw, -EINVAL); + __force_fw_fetch_failures(uc_fw, -ESTALE); err = request_firmware(&fw, uc_fw->path, dev); if (err) @@ -390,8 +389,9 @@ fail: return err; } -static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) +static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) { + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct drm_mm_node *node = &ggtt->uc_fw; GEM_BUG_ON(!drm_mm_node_allocated(node)); @@ -401,13 +401,12 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) return lower_32_bits(node->start); } -static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw, - struct intel_gt *gt) +static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; - struct i915_ggtt *ggtt = gt->ggtt; + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct i915_vma dummy = { - .node.start = uc_fw_ggtt_offset(uc_fw, ggtt), + .node.start = uc_fw_ggtt_offset(uc_fw), .node.size = obj->base.size, .pages = obj->mm.pages, .vm = &ggtt->vm, @@ -422,19 +421,18 @@ static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw, ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); } -static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw, - struct intel_gt *gt) +static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; - struct i915_ggtt *ggtt = gt->ggtt; - u64 start = uc_fw_ggtt_offset(uc_fw, ggtt); + struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; + u64 start = uc_fw_ggtt_offset(uc_fw); ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); } -static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, - u32 wopcm_offset, u32 dma_flags) +static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) { + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); struct intel_uncore *uncore = gt->uncore; u64 offset; int ret; @@ -446,13 +444,13 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = uc_fw_ggtt_offset(uc_fw, gt->ggtt); + offset = uc_fw_ggtt_offset(uc_fw); GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000); intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset)); /* Set the DMA destination */ - intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, wopcm_offset); + intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset); intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* @@ -484,17 +482,16 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, /** * intel_uc_fw_upload - load uC firmware using custom loader * @uc_fw: uC firmware - * @gt: the intel_gt structure - * @wopcm_offset: destination offset in wopcm + * @dst_offset: destination offset * @dma_flags: flags for flags for dma ctrl * * Loads uC firmware and updates internal flags. * * Return: 0 on success, non-zero on failure. */ -int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, - u32 wopcm_offset, u32 dma_flags) +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) { + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); int err; /* make sure the status was cleared the last time we reset the uc */ @@ -508,9 +505,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, return -ENOEXEC; /* Call custom loader */ - intel_uc_fw_ggtt_bind(uc_fw, gt); - err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags); - intel_uc_fw_ggtt_unbind(uc_fw, gt); + uc_fw_bind_ggtt(uc_fw); + err = uc_fw_xfer(uc_fw, dst_offset, dma_flags); + uc_fw_unbind_ggtt(uc_fw); if (err) goto fail; @@ -547,10 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { - if (!intel_uc_fw_is_available(uc_fw)) - return; - - i915_gem_object_unpin_pages(uc_fw->obj); + intel_uc_fw_cleanup_fetch(uc_fw); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 7a0a5989afc9..1f30543d0d2d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -229,10 +229,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type, bool supported, enum intel_platform platform, u8 rev); -int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915); +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); -int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, - u32 wopcm_offset, u32 dma_flags); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); int intel_uc_fw_init(struct intel_uc_fw *uc_fw); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c deleted file mode 100644 index d8a80388bd31..000000000000 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2017 Intel Corporation - */ - -#include "i915_selftest.h" -#include "gem/i915_gem_pm.h" - -/* max doorbell number + negative test for each client type */ -#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) - -static struct intel_guc_client *clients[ATTEMPTS]; - -static bool available_dbs(struct intel_guc *guc, u32 priority) -{ - unsigned long offset; - unsigned long end; - u16 id; - - /* first half is used for normal priority, second half for high */ - offset = 0; - end = GUC_NUM_DOORBELLS / 2; - if (priority <= GUC_CLIENT_PRIORITY_HIGH) { - offset = end; - end += offset; - } - - id = find_next_zero_bit(guc->doorbell_bitmap, end, offset); - if (id < end) - return true; - - return false; -} - -static int check_all_doorbells(struct intel_guc *guc) -{ - u16 db_id; - - pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS); - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) { - if (!doorbell_ok(guc, db_id)) { - pr_err("doorbell %d, not ok\n", db_id); - return -EIO; - } - } - - return 0; -} - -static int ring_doorbell_nop(struct intel_guc_client *client) -{ - struct guc_process_desc *desc = __get_process_desc(client); - int err; - - client->use_nop_wqi = true; - - spin_lock_irq(&client->wq_lock); - - guc_wq_item_append(client, 0, 0, 0, 0); - guc_ring_doorbell(client); - - spin_unlock_irq(&client->wq_lock); - - client->use_nop_wqi = false; - - /* if there are no issues GuC will update the WQ head and keep the - * WQ in active status - */ - err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10); - if (err) { - pr_err("doorbell %u ring failed!\n", client->doorbell_id); - return -EIO; - } - - if (desc->wq_status != WQ_STATUS_ACTIVE) { - pr_err("doorbell %u ring put WQ in bad state (%u)!\n", - client->doorbell_id, desc->wq_status); - return -EIO; - } - - return 0; -} - -/* - * Basic client sanity check, handy to validate create_clients. - */ -static int validate_client(struct intel_guc_client *client, int client_priority) -{ - if (client->priority != client_priority || - client->doorbell_id == GUC_DOORBELL_INVALID) - return -EINVAL; - else - return 0; -} - -static bool client_doorbell_in_sync(struct intel_guc_client *client) -{ - return !client || doorbell_ok(client->guc, client->doorbell_id); -} - -/* - * Check that we're able to synchronize guc_clients with their doorbells - * - * We're creating clients and reserving doorbells once, at module load. During - * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to - * GuC being reset. In other words - GuC clients are still around, but the - * status of their doorbells may be incorrect. This is the reason behind - * validating that the doorbells status expected by the driver matches what the - * GuC/HW have. - */ -static int igt_guc_clients(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_guc *guc = >->uc.guc; - intel_wakeref_t wakeref; - int err = 0; - - GEM_BUG_ON(!HAS_GT_UC(gt->i915)); - wakeref = intel_runtime_pm_get(gt->uncore->rpm); - - err = check_all_doorbells(guc); - if (err) - goto unlock; - - /* - * Get rid of clients created during driver load because the test will - * recreate them. - */ - guc_clients_disable(guc); - guc_clients_destroy(guc); - if (guc->execbuf_client) { - pr_err("guc_clients_destroy lied!\n"); - err = -EINVAL; - goto unlock; - } - - err = guc_clients_create(guc); - if (err) { - pr_err("Failed to create clients\n"); - goto unlock; - } - GEM_BUG_ON(!guc->execbuf_client); - - err = validate_client(guc->execbuf_client, - GUC_CLIENT_PRIORITY_KMD_NORMAL); - if (err) { - pr_err("execbug client validation failed\n"); - goto out; - } - - /* the client should now have reserved a doorbell */ - if (!has_doorbell(guc->execbuf_client)) { - pr_err("guc_clients_create didn't reserve doorbells\n"); - err = -EINVAL; - goto out; - } - - /* Now enable the clients */ - guc_clients_enable(guc); - - /* each client should now have received a doorbell */ - if (!client_doorbell_in_sync(guc->execbuf_client)) { - pr_err("failed to initialize the doorbells\n"); - err = -EINVAL; - goto out; - } - - /* - * Basic test - an attempt to reallocate a valid doorbell to the - * client it is currently assigned should not cause a failure. - */ - err = create_doorbell(guc->execbuf_client); - -out: - /* - * Leave clean state for other test, plus the driver always destroy the - * clients during unload. - */ - guc_clients_disable(guc); - guc_clients_destroy(guc); - guc_clients_create(guc); - guc_clients_enable(guc); -unlock: - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - return err; -} - -/* - * Create as many clients as number of doorbells. Note that there's already - * client(s)/doorbell(s) created during driver load, but this test creates - * its own and do not interact with the existing ones. - */ -static int igt_guc_doorbells(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_guc *guc = >->uc.guc; - intel_wakeref_t wakeref; - int i, err = 0; - u16 db_id; - - GEM_BUG_ON(!HAS_GT_UC(gt->i915)); - wakeref = intel_runtime_pm_get(gt->uncore->rpm); - - err = check_all_doorbells(guc); - if (err) - goto unlock; - - for (i = 0; i < ATTEMPTS; i++) { - clients[i] = guc_client_alloc(guc, i % GUC_CLIENT_PRIORITY_NUM); - - if (!clients[i]) { - pr_err("[%d] No guc client\n", i); - err = -EINVAL; - goto out; - } - - if (IS_ERR(clients[i])) { - if (PTR_ERR(clients[i]) != -ENOSPC) { - pr_err("[%d] unexpected error\n", i); - err = PTR_ERR(clients[i]); - goto out; - } - - if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) { - pr_err("[%d] non-db related alloc fail\n", i); - err = -EINVAL; - goto out; - } - - /* expected, ran out of dbs for this client type */ - continue; - } - - /* - * The check below is only valid because we keep a doorbell - * assigned during the whole life of the client. - */ - if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) { - pr_err("[%d] more clients than doorbells (%d >= %d)\n", - i, clients[i]->stage_id, GUC_NUM_DOORBELLS); - err = -EINVAL; - goto out; - } - - err = validate_client(clients[i], i % GUC_CLIENT_PRIORITY_NUM); - if (err) { - pr_err("[%d] client_alloc sanity check failed!\n", i); - err = -EINVAL; - goto out; - } - - db_id = clients[i]->doorbell_id; - - err = __guc_client_enable(clients[i]); - if (err) { - pr_err("[%d] Failed to create a doorbell\n", i); - goto out; - } - - /* doorbell id shouldn't change, we are holding the mutex */ - if (db_id != clients[i]->doorbell_id) { - pr_err("[%d] doorbell id changed (%d != %d)\n", - i, db_id, clients[i]->doorbell_id); - err = -EINVAL; - goto out; - } - - err = check_all_doorbells(guc); - if (err) - goto out; - - err = ring_doorbell_nop(clients[i]); - if (err) - goto out; - } - -out: - for (i = 0; i < ATTEMPTS; i++) - if (!IS_ERR_OR_NULL(clients[i])) { - __guc_client_disable(clients[i]); - guc_client_free(clients[i]); - } -unlock: - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - return err; -} - -int intel_guc_live_selftest(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_guc_clients), - SUBTEST(igt_guc_doorbells), - }; - - if (!USES_GUC_SUBMISSION(i915)) - return 0; - - return intel_gt_live_subtests(tests, &i915->gt); -} |