diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
164 files changed, 3599 insertions, 3440 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 69f57ca9c68d..b63d374dff23 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -20,13 +20,13 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI - select IO_MAPPING select SYNC_FILE select IOSF_MBI select CRC32 select SND_HDA_I915 if SND_HDA_CORE select CEC_CORE if CEC_NOTIFIER select VMAP_PFN + select DRM_TTM help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, @@ -102,7 +102,6 @@ config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 depends on 64BIT - depends on VFIO_MDEV=y || VFIO_MDEV=DRM_I915 default n help Choose this option if you want to enable Intel GVT-g graphics diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6947495bf34b..4f22cac1c49b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -50,6 +50,7 @@ i915-y += i915_drv.o \ intel_memory_region.o \ intel_pch.o \ intel_pm.o \ + intel_region_ttm.o \ intel_runtime_pm.o \ intel_sideband.o \ intel_step.o \ @@ -160,7 +161,6 @@ gem-y += \ i915-y += \ $(gem-y) \ i915_active.o \ - i915_buddy.o \ i915_cmd_parser.o \ i915_gem_evict.o \ i915_gem_gtt.o \ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ccedbafe5157..2589065fb630 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11143,7 +11143,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb; - fence = dma_resv_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_unlocked(obj->base.resv); if (fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); @@ -11770,11 +11770,20 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_framebuffer *fb; struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; + struct drm_i915_private *i915; obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); if (!obj) return ERR_PTR(-ENOENT); + /* object is backed with LMEM for discrete */ + i915 = to_i915(obj->base.dev); + if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) { + /* object is "remote", not in local memory */ + i915_gem_object_put(obj); + return ERR_PTR(-EREMOTE); + } + fb = intel_framebuffer_create(obj, &mode_cmd); i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8bb4b8507181..70fe90beaf6c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -963,8 +963,8 @@ intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_base_id) intel_dp_create_fake_mst_encoders(dig_port); ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm, &intel_dp->aux, 16, 3, - (u8)dig_port->max_lanes, - drm_dp_link_rate_to_bw_code(max_source_rate), + dig_port->max_lanes, + max_source_rate, conn_base_id); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 7b52d11ae053..df05d285f0bd 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -41,6 +41,8 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> +#include "gem/i915_gem_lmem.h" + #include "i915_drv.h" #include "intel_display_types.h" #include "intel_fbdev.h" @@ -137,14 +139,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper, size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); - /* If the FB is too big, just don't use it since fbdev is not very - * important and we should probably use that space with FBC or other - * features. */ obj = ERR_PTR(-ENODEV); - if (size * 2 < dev_priv->stolen_usable_size) - obj = i915_gem_object_create_stolen(dev_priv, size); - if (IS_ERR(obj)) - obj = i915_gem_object_create_shmem(dev_priv, size); + if (HAS_LMEM(dev_priv)) { + obj = i915_gem_object_create_lmem(dev_priv, size, + I915_BO_ALLOC_CONTIGUOUS); + } else { + /* + * If the FB is too big, just don't use it since fbdev is not very + * important and we should probably use that space with FBC or other + * features. + */ + if (size * 2 < dev_priv->stolen_usable_size) + obj = i915_gem_object_create_stolen(dev_priv, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_shmem(dev_priv, size); + } + if (IS_ERR(obj)) { drm_err(&dev_priv->drm, "failed to allocate framebuffer\n"); return PTR_ERR(obj); @@ -178,6 +188,7 @@ static int intelfb_create(struct drm_fb_helper *helper, unsigned long flags = 0; bool prealloc = false; void __iomem *vaddr; + struct drm_i915_gem_object *obj; int ret; if (intel_fb && @@ -232,13 +243,27 @@ static int intelfb_create(struct drm_fb_helper *helper, info->fbops = &intelfb_ops; /* setup aperture base/size for vesafb takeover */ - info->apertures->ranges[0].base = ggtt->gmadr.start; - info->apertures->ranges[0].size = ggtt->mappable_end; + obj = intel_fb_obj(&intel_fb->base); + if (i915_gem_object_is_lmem(obj)) { + struct intel_memory_region *mem = obj->mm.region; + + info->apertures->ranges[0].base = mem->io_start; + info->apertures->ranges[0].size = mem->total; + + /* Use fbdev's framebuffer from lmem for discrete */ + info->fix.smem_start = + (unsigned long)(mem->io_start + + i915_gem_object_get_dma_address(obj, 0)); + info->fix.smem_len = obj->base.size; + } else { + info->apertures->ranges[0].base = ggtt->gmadr.start; + info->apertures->ranges[0].size = ggtt->mappable_end; - /* Our framebuffer is the entirety of fbdev's system memory */ - info->fix.smem_start = - (unsigned long)(ggtt->gmadr.start + vma->node.start); - info->fix.smem_len = vma->node.size; + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + vma->node.start); + info->fix.smem_len = vma->node.size; + } vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 8161d49e78ba..8e75debcce1a 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -211,7 +211,6 @@ static int frontbuffer_active(struct i915_active *ref) return 0; } -__i915_active_call static void frontbuffer_retire(struct i915_active *ref) { struct intel_frontbuffer *front = @@ -266,7 +265,8 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj) atomic_set(&front->bits, 0); i915_active_init(&front->write, frontbuffer_active, - i915_active_may_sleep(frontbuffer_retire)); + frontbuffer_retire, + I915_ACTIVE_RETIRE_SLEEPS); spin_lock(&i915->fb_tracking.lock); if (rcu_access_pointer(obj->frontbuffer)) { diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 46cba12be888..7e3f5c6ca484 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -384,8 +384,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) i830_overlay_clock_gating(dev_priv, true); } -__i915_active_call static void -intel_overlay_last_flip_retire(struct i915_active *active) +static void intel_overlay_last_flip_retire(struct i915_active *active) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -1402,7 +1401,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->saturation = 146; i915_active_init(&overlay->last_flip, - NULL, intel_overlay_last_flip_retire); + NULL, intel_overlay_last_flip_retire, 0); ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); if (ret) diff --git a/drivers/gpu/drm/i915/dma_resv_utils.c b/drivers/gpu/drm/i915/dma_resv_utils.c index 9e508e7d4629..7df91b7e4ca8 100644 --- a/drivers/gpu/drm/i915/dma_resv_utils.c +++ b/drivers/gpu/drm/i915/dma_resv_utils.c @@ -10,7 +10,7 @@ void dma_resv_prune(struct dma_resv *resv) { if (dma_resv_trylock(resv)) { - if (dma_resv_test_signaled_rcu(resv, true)) + if (dma_resv_test_signaled(resv, true)) dma_resv_add_excl_fence(resv, NULL); dma_resv_unlock(resv); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 25235ef630c1..6234e17259c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !dma_resv_test_signaled_rcu(obj->resv, true); + * !dma_resv_test_signaled(obj->resv, true); * to report the overall busyness. This is what the wait-ioctl does. * */ @@ -113,11 +113,10 @@ retry: seq = raw_read_seqcount(&obj->base.resv->seq); /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = - busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); + args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); /* Translate shared fences to READ set of engines */ - list = rcu_dereference(obj->base.resv->fence); + list = dma_resv_shared_list(obj->base.resv); if (list) { unsigned int shared_count = list->shared_count, i; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd8ee52e17a4..7720b8c22c81 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1046,7 +1046,6 @@ struct context_barrier_task { void *data; }; -__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); @@ -1080,7 +1079,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (!cb) return -ENOMEM; - i915_active_init(&cb->base, NULL, cb_retire); + i915_active_init(&cb->base, NULL, cb_retire, 0); err = i915_active_acquire(&cb->base); if (err) { kfree(cb); @@ -1191,7 +1190,7 @@ static void set_ppgtt_barrier(void *data) { struct i915_address_space *old = data; - if (INTEL_GEN(old->i915) < 8) + if (GRAPHICS_VER(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); i915_vm_close(old); @@ -1437,7 +1436,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, context->max_eus_per_subslice = user->max_eus_per_subslice; /* Part specific restrictions. */ - if (IS_GEN(i915, 11)) { + if (GRAPHICS_VER(i915) == 11) { unsigned int hw_s = hweight8(device->slice_mask); unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); unsigned int req_s = hweight8(context->slice_mask); @@ -1504,7 +1503,7 @@ static int set_sseu(struct i915_gem_context *ctx, if (args->size < sizeof(user_sseu)) return -EINVAL; - if (!IS_GEN(i915, 11)) + if (GRAPHICS_VER(i915) != 11) return -ENODEV; if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 45d60e3d98e3..548ddf39d853 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -4,44 +4,102 @@ */ #include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_trace.h" +#include "i915_user_extensions.h" + +static u32 object_max_page_size(struct drm_i915_gem_object *obj) +{ + u32 max_page_size = 0; + int i; + + for (i = 0; i < obj->mm.n_placements; i++) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); + max_page_size = max_t(u32, max_page_size, mr->min_page_size); + } + + GEM_BUG_ON(!max_page_size); + return max_page_size; +} + +static void object_set_placements(struct drm_i915_gem_object *obj, + struct intel_memory_region **placements, + unsigned int n_placements) +{ + GEM_BUG_ON(!n_placements); + + /* + * For the common case of one memory region, skip storing an + * allocated array and just point at the region directly. + */ + if (n_placements == 1) { + struct intel_memory_region *mr = placements[0]; + struct drm_i915_private *i915 = mr->i915; + + obj->mm.placements = &i915->mm.regions[mr->id]; + obj->mm.n_placements = 1; + } else { + obj->mm.placements = placements; + obj->mm.n_placements = n_placements; + } +} + +static int i915_gem_publish(struct drm_i915_gem_object *obj, + struct drm_file *file, + u64 *size_p, + u32 *handle_p) +{ + u64 size = obj->base.size; + int ret; + + ret = drm_gem_handle_create(file, &obj->base, handle_p); + /* drop reference from allocate - handle holds it now */ + i915_gem_object_put(obj); + if (ret) + return ret; + + *size_p = size; + return 0; +} static int -i915_gem_create(struct drm_file *file, - struct intel_memory_region *mr, - u64 *size_p, - u32 *handle_p) +i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) { - struct drm_i915_gem_object *obj; - u32 handle; - u64 size; + struct intel_memory_region *mr = obj->mm.placements[0]; + unsigned int flags; int ret; - GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); - size = round_up(*size_p, mr->min_page_size); + size = round_up(size, object_max_page_size(obj)); if (size == 0) return -EINVAL; /* For most of the ABI (e.g. mmap) we think in system pages */ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); - /* Allocate the new object */ - obj = i915_gem_object_create_region(mr, size, 0); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (i915_gem_object_size_2big(size)) + return -E2BIG; - GEM_BUG_ON(size != obj->base.size); + /* + * For now resort to CPU based clearing for device local-memory, in the + * near future this will use the blitter engine for accelerated, GPU + * based clearing. + */ + flags = 0; + if (mr->type == INTEL_MEMORY_LOCAL) + flags = I915_BO_ALLOC_CPU_CLEAR; - ret = drm_gem_handle_create(file, &obj->base, &handle); - /* drop reference from allocate - handle holds it now */ - i915_gem_object_put(obj); + ret = mr->ops->init_object(mr, obj, size, flags); if (ret) return ret; - *handle_p = handle; - *size_p = size; + GEM_BUG_ON(size != obj->base.size); + + trace_i915_gem_object_create(obj); return 0; } @@ -50,9 +108,12 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { + struct drm_i915_gem_object *obj; + struct intel_memory_region *mr; enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; + int ret; switch (cpp) { case 1: @@ -85,10 +146,22 @@ i915_gem_dumb_create(struct drm_file *file, if (HAS_LMEM(to_i915(dev))) mem_type = INTEL_MEMORY_LOCAL; - return i915_gem_create(file, - intel_memory_region_by_type(to_i915(dev), - mem_type), - &args->size, &args->handle); + obj = i915_gem_object_alloc(); + if (!obj) + return -ENOMEM; + + mr = intel_memory_region_by_type(to_i915(dev), mem_type); + object_set_placements(obj, &mr, 1); + + ret = i915_gem_setup(obj, args->size); + if (ret) + goto object_free; + + return i915_gem_publish(obj, file, &args->size, &args->handle); + +object_free: + i915_gem_object_free(obj); + return ret; } /** @@ -103,11 +176,229 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, { struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create *args = data; + struct drm_i915_gem_object *obj; + struct intel_memory_region *mr; + int ret; i915_gem_flush_free_objects(i915); - return i915_gem_create(file, - intel_memory_region_by_type(i915, - INTEL_MEMORY_SYSTEM), - &args->size, &args->handle); + obj = i915_gem_object_alloc(); + if (!obj) + return -ENOMEM; + + mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); + object_set_placements(obj, &mr, 1); + + ret = i915_gem_setup(obj, args->size); + if (ret) + goto object_free; + + return i915_gem_publish(obj, file, &args->size, &args->handle); + +object_free: + i915_gem_object_free(obj); + return ret; +} + +struct create_ext { + struct drm_i915_private *i915; + struct drm_i915_gem_object *vanilla_object; +}; + +static void repr_placements(char *buf, size_t size, + struct intel_memory_region **placements, + int n_placements) +{ + int i; + + buf[0] = '\0'; + + for (i = 0; i < n_placements; i++) { + struct intel_memory_region *mr = placements[i]; + int r; + + r = snprintf(buf, size, "\n %s -> { class: %d, inst: %d }", + mr->name, mr->type, mr->instance); + if (r >= size) + return; + + buf += r; + size -= r; + } +} + +static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, + struct create_ext *ext_data) +{ + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_memory_class_instance __user *uregions = + u64_to_user_ptr(args->regions); + struct drm_i915_gem_object *obj = ext_data->vanilla_object; + struct intel_memory_region **placements; + u32 mask; + int i, ret = 0; + + if (args->pad) { + drm_dbg(&i915->drm, "pad should be zero\n"); + ret = -EINVAL; + } + + if (!args->num_regions) { + drm_dbg(&i915->drm, "num_regions is zero\n"); + ret = -EINVAL; + } + + if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) { + drm_dbg(&i915->drm, "num_regions is too large\n"); + ret = -EINVAL; + } + + if (ret) + return ret; + + placements = kmalloc_array(args->num_regions, + sizeof(struct intel_memory_region *), + GFP_KERNEL); + if (!placements) + return -ENOMEM; + + mask = 0; + for (i = 0; i < args->num_regions; i++) { + struct drm_i915_gem_memory_class_instance region; + struct intel_memory_region *mr; + + if (copy_from_user(®ion, uregions, sizeof(region))) { + ret = -EFAULT; + goto out_free; + } + + mr = intel_memory_region_lookup(i915, + region.memory_class, + region.memory_instance); + if (!mr || mr->private) { + drm_dbg(&i915->drm, "Device is missing region { class: %d, inst: %d } at index = %d\n", + region.memory_class, region.memory_instance, i); + ret = -EINVAL; + goto out_dump; + } + + if (mask & BIT(mr->id)) { + drm_dbg(&i915->drm, "Found duplicate placement %s -> { class: %d, inst: %d } at index = %d\n", + mr->name, region.memory_class, + region.memory_instance, i); + ret = -EINVAL; + goto out_dump; + } + + placements[i] = mr; + mask |= BIT(mr->id); + + ++uregions; + } + + if (obj->mm.placements) { + ret = -EINVAL; + goto out_dump; + } + + object_set_placements(obj, placements, args->num_regions); + if (args->num_regions == 1) + kfree(placements); + + return 0; + +out_dump: + if (1) { + char buf[256]; + + if (obj->mm.placements) { + repr_placements(buf, + sizeof(buf), + obj->mm.placements, + obj->mm.n_placements); + drm_dbg(&i915->drm, + "Placements were already set in previous EXT. Existing placements: %s\n", + buf); + } + + repr_placements(buf, sizeof(buf), placements, i); + drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf); + } + +out_free: + kfree(placements); + return ret; +} + +static int ext_set_placements(struct i915_user_extension __user *base, + void *data) +{ + struct drm_i915_gem_create_ext_memory_regions ext; + + if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) + return -ENODEV; + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + return set_placements(&ext, data); +} + +static const i915_user_extension_fn create_extensions[] = { + [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, +}; + +/** + * Creates a new mm object and returns a handle to it. + * @dev: drm device pointer + * @data: ioctl data blob + * @file: drm file pointer + */ +int +i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_create_ext *args = data; + struct create_ext ext_data = { .i915 = i915 }; + struct intel_memory_region **placements_ext; + struct drm_i915_gem_object *obj; + int ret; + + if (args->flags) + return -EINVAL; + + i915_gem_flush_free_objects(i915); + + obj = i915_gem_object_alloc(); + if (!obj) + return -ENOMEM; + + ext_data.vanilla_object = obj; + ret = i915_user_extensions(u64_to_user_ptr(args->extensions), + create_extensions, + ARRAY_SIZE(create_extensions), + &ext_data); + placements_ext = obj->mm.placements; + if (ret) + goto object_free; + + if (!placements_ext) { + struct intel_memory_region *mr = + intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); + + object_set_placements(obj, &mr, 1); + } + + ret = i915_gem_setup(obj, args->size); + if (ret) + goto object_free; + + return i915_gem_publish(obj, file, &args->size, &args->handle); + +object_free: + if (obj->mm.n_placements > 1) + kfree(placements_ext); + i915_gem_object_free(obj); + return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ccede73c6465..616c3a2f1baf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -209,7 +209,7 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - sg_page_sizes = i915_sg_page_sizes(pages->sgl); + sg_page_sizes = i915_sg_dma_sizes(pages->sgl); __i915_gem_object_set_pages(obj, pages, sg_page_sizes); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..a8abc9af5ff4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -500,7 +500,7 @@ eb_validate_vma(struct i915_execbuffer *eb, * also covers all platforms with local memory. */ if (entry->relocation_count && - INTEL_GEN(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) + GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) return -EINVAL; if (unlikely(entry->flags & eb->invalid_flags)) @@ -1439,7 +1439,7 @@ err_pool: static bool reloc_can_use_engine(const struct intel_engine_cs *engine) { - return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6); + return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6; } static u32 *reloc_gpu(struct i915_execbuffer *eb, @@ -1481,7 +1481,7 @@ static inline bool use_reloc_gpu(struct i915_vma *vma) if (DBG_FORCE_RELOC) return false; - return !dma_resv_test_signaled_rcu(vma->resv, true); + return !dma_resv_test_signaled(vma->resv, true); } static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) @@ -1671,7 +1671,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, * batchbuffers. */ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && - IS_GEN(eb->i915, 6)) { + GRAPHICS_VER(eb->i915) == 6) { err = i915_vma_bind(target->vma, target->vma->obj->cache_level, PIN_GLOBAL, NULL); @@ -2332,7 +2332,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) u32 *cs; int i; - if (!IS_GEN(rq->engine->i915, 7) || rq->engine->id != RCS0) { + if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) { drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -3375,7 +3375,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { - if (INTEL_GEN(i915) >= 11) + if (GRAPHICS_VER(i915) >= 11) return -ENODEV; /* Return -EPERM to trigger fallback code on old binaries. */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index 7fd22f3efbef..28d6526e32ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -14,6 +14,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index ce1c83c13d05..3b4aa28a076d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -4,22 +4,95 @@ */ #include "intel_memory_region.h" +#include "intel_region_ttm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_lmem.h" #include "i915_drv.h" +static void lmem_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + obj->mm.dirty = false; + sg_free_table(pages); + kfree(pages); +} + +static int lmem_get_pages(struct drm_i915_gem_object *obj) +{ + unsigned int flags; + struct sg_table *pages; + + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + + obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, + obj->base.size, + flags); + if (IS_ERR(obj->mm.st_mm_node)) + return PTR_ERR(obj->mm.st_mm_node); + + /* Range manager is always contigous */ + if (obj->mm.region->is_range_manager) + obj->flags |= I915_BO_ALLOC_CONTIGUOUS; + pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); + if (IS_ERR(pages)) { + intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + return PTR_ERR(pages); + } + + __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); + + if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) { + void __iomem *vaddr = + i915_gem_object_lmem_io_map(obj, 0, obj->base.size); + + if (!vaddr) { + struct sg_table *pages = + __i915_gem_object_unset_pages(obj); + + if (!IS_ERR_OR_NULL(pages)) + lmem_put_pages(obj, pages); + } + + memset_io(vaddr, 0, obj->base.size); + io_mapping_unmap(vaddr); + } + + return 0; +} + const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { .name = "i915_gem_object_lmem", .flags = I915_GEM_OBJECT_HAS_IOMEM, - .get_pages = i915_gem_object_get_pages_buddy, - .put_pages = i915_gem_object_put_pages_buddy, + .get_pages = lmem_get_pages, + .put_pages = lmem_put_pages, .release = i915_gem_object_release_memory_region, }; +void __iomem * +i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, + unsigned long n, + unsigned long size) +{ + resource_size_t offset; + + GEM_BUG_ON(!i915_gem_object_is_contiguous(obj)); + + offset = i915_gem_object_get_dma_address(obj, n); + offset -= obj->mm.region->region.start; + + return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); +} + bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) { - return obj->ops == &i915_gem_lmem_obj_ops; + struct intel_memory_region *mr = obj->mm.region; + + return mr && (mr->type == INTEL_MEMORY_LOCAL || + mr->type == INTEL_MEMORY_STOLEN_LOCAL); } struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 036d53c01de9..fac6bc5a5ebb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -14,6 +14,11 @@ struct intel_memory_region; extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; +void __iomem * +i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, + unsigned long n, + unsigned long size); + bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index f6fe5cb01438..215326764606 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -56,10 +56,18 @@ int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_mmap *args = data; struct drm_i915_gem_object *obj; unsigned long addr; + /* + * mmap ioctl is disallowed for all discrete platforms, + * and for all platforms with GRAPHICS_VER > 12. + */ + if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12) + return -EOPNOTSUPP; + if (args->flags & ~(I915_MMAP_WC)) return -EINVAL; @@ -367,10 +375,11 @@ retry: goto err_unpin; /* Finally, remap it using the new GTT offset */ - ret = io_mapping_map_user(&ggtt->iomap, area, area->vm_start + - (vma->ggtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start)); + ret = remap_io_mapping(area, + area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), + (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, + min_t(u64, vma->size, area->vm_end - area->vm_start), + &ggtt->iomap); if (ret) goto err_fence; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index ea74cbca95be..5706d471692d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -62,6 +62,13 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, struct lock_class_key *key, unsigned flags) { + /* + * A gem object is embedded both in a struct ttm_buffer_object :/ and + * in a drm_i915_gem_object. Make sure they are aliased. + */ + BUILD_BUG_ON(offsetof(typeof(*obj), base) != + offsetof(typeof(*obj), __do_not_access.base)); + spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); @@ -249,6 +256,12 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (obj->ops->release) obj->ops->release(obj); + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + + if (obj->shares_resv_from) + i915_vm_resv_put(obj->shares_resv_from); + /* But keep the pointer alive for RCU-protected lookups */ call_rcu(&obj->rcu, __i915_gem_free_object_rcu); cond_resched(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..7c0eb425cb3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -500,7 +500,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) struct dma_fence *fence; rcu_read_lock(); - fence = dma_resv_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_unlocked(obj->base.resv); rcu_read_unlock(); if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index df8e8c18c6c9..3e28c68fda3e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -72,7 +72,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - if (INTEL_GEN(i915) >= 8) { + if (GRAPHICS_VER(i915) >= 8) { *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cmd++ = 0; @@ -232,7 +232,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) { u32 height = size >> PAGE_SHIFT; - if (!IS_GEN(i915, 11)) + if (GRAPHICS_VER(i915) != 11) return false; return height % 4 == 3 && height <= 8; @@ -297,7 +297,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, size = min_t(u64, rem, block_size); GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - if (INTEL_GEN(i915) >= 9 && + if (GRAPHICS_VER(i915) >= 9 && !wa_1209644611_applies(i915, size)) { *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; @@ -309,7 +309,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, *cmd++ = PAGE_SIZE; *cmd++ = lower_32_bits(src_offset); *cmd++ = upper_32_bits(src_offset); - } else if (INTEL_GEN(i915) >= 8) { + } else if (GRAPHICS_VER(i915) >= 8) { *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; *cmd++ = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 8e485cb3343c..d047ea126029 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -10,6 +10,7 @@ #include <linux/mmu_notifier.h> #include <drm/drm_gem.h> +#include <drm/ttm/ttm_bo_api.h> #include <uapi/drm/i915_drm.h> #include "i915_active.h" @@ -99,7 +100,16 @@ struct i915_gem_object_page_iter { }; struct drm_i915_gem_object { - struct drm_gem_object base; + /* + * We might have reason to revisit the below since it wastes + * a lot of space for non-ttm gem objects. + * In any case, always use the accessors for the ttm_buffer_object + * when accessing it. + */ + union { + struct drm_gem_object base; + struct ttm_buffer_object __do_not_access; + }; const struct drm_i915_gem_object_ops *ops; @@ -149,6 +159,10 @@ struct drm_i915_gem_object { * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. */ struct list_head obj_link; + /** + * @shared_resv_from: The object shares the resv from this vm. + */ + struct i915_address_space *shares_resv_from; union { struct rcu_head rcu; @@ -172,11 +186,13 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_STRUCT_PAGE BIT(2) +#define I915_BO_ALLOC_CPU_CLEAR BIT(3) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ - I915_BO_ALLOC_STRUCT_PAGE) -#define I915_BO_READONLY BIT(3) -#define I915_TILING_QUIRK_BIT 4 /* unknown swizzling; do not release! */ + I915_BO_ALLOC_STRUCT_PAGE | \ + I915_BO_ALLOC_CPU_CLEAR) +#define I915_BO_READONLY BIT(4) +#define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ /* * Is the object to be mapped as read-only to the GPU @@ -220,13 +236,21 @@ struct drm_i915_gem_object { atomic_t shrink_pin; /** + * Priority list of potential placements for this object. + */ + struct intel_memory_region **placements; + int n_placements; + + /** * Memory region for this object. */ struct intel_memory_region *region; + /** - * List of memory region blocks allocated for this object. + * Memory manager node allocated for this object. */ - struct list_head blocks; + void *st_mm_node; + /** * Element within memory_region->objects or region->purgeable * if the object is marked as DONTNEED. Access is protected by diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index aed8a37ccdc9..6444e097016d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -63,6 +63,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); i915_gem_object_set_tiling_quirk(obj); + GEM_BUG_ON(!list_empty(&obj->mm.link)); + atomic_inc(&obj->mm.shrink_pin); shrinkable = false; } @@ -473,7 +475,8 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, might_sleep(); GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + if (!i915_gem_object_has_pinned_pages(obj)) + assert_object_held(obj); /* As we iterate forward through the sg, we record each entry in a * radixtree for quick repeated (backwards) lookups. If we have seen diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 81dc2bf59bc3..be72ad0634ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -8,7 +8,6 @@ #include <linux/shmem_fs.h> #include <linux/swap.h> -#include <drm/drm.h> /* for drm_legacy.h! */ #include <drm/drm_cache.h> #include "gt/intel_gt.h" @@ -208,7 +207,7 @@ static int i915_gem_object_shmem_to_phys(struct drm_i915_gem_object *obj) err_xfer: if (!IS_ERR_OR_NULL(pages)) { - unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); + unsigned int sg_page_sizes = i915_sg_dma_sizes(pages->sgl); __i915_gem_object_set_pages(obj, pages, sg_page_sizes); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 6a84fb6dde24..f25e6646c5b7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -8,107 +8,9 @@ #include "i915_drv.h" #include "i915_trace.h" -void -i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks); - - obj->mm.dirty = false; - sg_free_table(pages); - kfree(pages); -} - -int -i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) -{ - const u64 max_segment = i915_sg_segment_size(); - struct intel_memory_region *mem = obj->mm.region; - struct list_head *blocks = &obj->mm.blocks; - resource_size_t size = obj->base.size; - resource_size_t prev_end; - struct i915_buddy_block *block; - unsigned int flags; - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - int ret; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, size >> PAGE_SHIFT, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - - flags = I915_ALLOC_MIN_PAGE_SIZE; - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; - - ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); - if (ret) - goto err_free_sg; - - GEM_BUG_ON(list_empty(blocks)); - - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - prev_end = (resource_size_t)-1; - - list_for_each_entry(block, blocks, link) { - u64 block_size, offset; - - block_size = min_t(u64, size, - i915_buddy_block_size(&mem->mm, block)); - offset = i915_buddy_block_offset(block); - - while (block_size) { - u64 len; - - if (offset != prev_end || sg->length >= max_segment) { - if (st->nents) { - sg_page_sizes |= sg->length; - sg = __sg_next(sg); - } - - sg_dma_address(sg) = mem->region.start + offset; - sg_dma_len(sg) = 0; - sg->length = 0; - st->nents++; - } - - len = min(block_size, max_segment - sg->length); - sg->length += len; - sg_dma_len(sg) += len; - - offset += len; - block_size -= len; - - prev_end = offset; - } - } - - sg_page_sizes |= sg->length; - sg_mark_end(sg); - i915_sg_trim(st); - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err_free_sg: - sg_free_table(st); - kfree(st); - return ret; -} - void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, struct intel_memory_region *mem) { - INIT_LIST_HEAD(&obj->mm.blocks); obj->mm.region = intel_memory_region_get(mem); if (obj->base.size <= mem->min_page_size) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index ebddc86d78f7..84fcb3297400 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -12,10 +12,6 @@ struct intel_memory_region; struct drm_i915_gem_object; struct sg_table; -int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj); -void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, - struct sg_table *pages); - void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, struct intel_memory_region *mem); void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index a9bfa66c8da1..5d16c4462fda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -628,11 +628,13 @@ static const struct intel_memory_region_ops shmem_region_ops = { .init_object = shmem_object_init, }; -struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, + u16 type, u16 instance) { return intel_memory_region_create(i915, 0, totalram_pages() << PAGE_SHIFT, PAGE_SIZE, 0, + type, instance, &shmem_region_ops); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 4f9c8d3021ab..f4fb68e8955a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -38,15 +38,17 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) } static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, - unsigned long shrink) + unsigned long shrink, bool trylock_vm) { unsigned long flags; flags = 0; if (shrink & I915_SHRINK_ACTIVE) - flags = I915_GEM_OBJECT_UNBIND_ACTIVE; + flags |= I915_GEM_OBJECT_UNBIND_ACTIVE; if (!(shrink & I915_SHRINK_BOUND)) - flags = I915_GEM_OBJECT_UNBIND_TEST; + flags |= I915_GEM_OBJECT_UNBIND_TEST; + if (trylock_vm) + flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK; if (i915_gem_object_unbind(obj, flags) == 0) return true; @@ -117,6 +119,9 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, unsigned long scanned = 0; int err; + /* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */ + bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915); + trace_i915_gem_shrink(i915, target, shrink); /* @@ -204,7 +209,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); err = 0; - if (unsafe_drop_pages(obj, shrink)) { + if (unsafe_drop_pages(obj, shrink, trylock_vm)) { /* May arrive from get_pages on another bo */ if (!ww) { if (!i915_gem_object_trylock(obj)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0597de206de..b0c3a7dc60d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -10,6 +10,7 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gem_stolen.h" @@ -37,7 +38,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, return -ENODEV; /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(i915) >= 8 && start < 4096) + if (GRAPHICS_VER(i915) >= 8 && start < 4096) start = 4096; mutex_lock(&i915->mm.stolen_lock); @@ -83,14 +84,14 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, */ /* Make sure we don't clobber the GTT if it's within stolen memory */ - if (INTEL_GEN(i915) <= 4 && + if (GRAPHICS_VER(i915) <= 4 && !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; struct resource ggtt_res; resource_size_t ggtt_start; ggtt_start = intel_uncore_read(uncore, PGTBL_CTL); - if (IS_GEN(i915, 4)) + if (GRAPHICS_VER(i915) == 4) ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; else @@ -122,6 +123,14 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, } /* + * With stolen lmem, we don't need to check if the address range + * overlaps with the non-stolen system memory range, since lmem is local + * to the gpu. + */ + if (HAS_LMEM(i915)) + return 0; + + /* * Verify that nothing else uses this physical address. Stolen * memory should be reserved by the BIOS and hidden from the * kernel. So if the region is already marked as busy, something @@ -147,7 +156,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, * GEN3 firmware likes to smash pci bridges into the stolen * range. Apparently this works. */ - if (!r && !IS_GEN(i915, 3)) { + if (!r && GRAPHICS_VER(i915) != 3) { drm_err(&i915->drm, "conflict detected with stolen region: %pR\n", dsm); @@ -188,7 +197,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *i915, * Whether ILK really reuses the ELK register for this is unclear. * Let's see if we catch anyone with this supposedly enabled on ILK. */ - drm_WARN(&i915->drm, IS_GEN(i915, 5), + drm_WARN(&i915->drm, GRAPHICS_VER(i915) == 5, "ILK stolen reserved found? 0x%08x\n", reg_val); @@ -374,8 +383,9 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -static int i915_gem_init_stolen(struct drm_i915_private *i915) +static int i915_gem_init_stolen(struct intel_memory_region *mem) { + struct drm_i915_private *i915 = mem->i915; struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; @@ -389,17 +399,17 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) return 0; } - if (intel_vtd_active() && INTEL_GEN(i915) < 8) { + if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) { drm_notice(&i915->drm, "%s, disabling use of stolen memory\n", "DMAR active"); return 0; } - if (resource_size(&intel_graphics_stolen_res) == 0) + if (resource_size(&mem->region) == 0) return 0; - i915->dsm = intel_graphics_stolen_res; + i915->dsm = mem->region; if (i915_adjust_stolen(i915, &i915->dsm)) return 0; @@ -411,7 +421,7 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) reserved_base = stolen_top; reserved_size = 0; - switch (INTEL_GEN(i915)) { + switch (GRAPHICS_VER(i915)) { case 2: case 3: break; @@ -446,7 +456,7 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) &reserved_base, &reserved_size); break; default: - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(GRAPHICS_VER(i915)); fallthrough; case 11: case 12: @@ -627,10 +637,17 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, { static struct lock_class_key lock_class; unsigned int cache_level; + unsigned int flags; int err; + /* + * Stolen objects are always physically contiguous since we just + * allocate one big block underneath using the drm_mm range allocator. + */ + flags = I915_BO_ALLOC_CONTIGUOUS; + drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, 0); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, flags); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; @@ -640,9 +657,11 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, if (WARN_ON(!i915_gem_object_trylock(obj))) return -EBUSY; + i915_gem_object_init_memory_region(obj, mem); + err = i915_gem_object_pin_pages(obj); - if (!err) - i915_gem_object_init_memory_region(obj, mem); + if (err) + i915_gem_object_release_memory_region(obj); i915_gem_object_unlock(obj); return err; @@ -667,7 +686,8 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, if (!stolen) return -ENOMEM; - ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096); + ret = i915_gem_stolen_insert_node(i915, stolen, size, + mem->min_page_size); if (ret) goto err_free; @@ -688,39 +708,128 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN_SMEM], - size, I915_BO_ALLOC_CONTIGUOUS); + return i915_gem_object_create_region(i915->mm.stolen_region, size, 0); } -static int init_stolen(struct intel_memory_region *mem) +static int init_stolen_smem(struct intel_memory_region *mem) { - intel_memory_region_set_name(mem, "stolen"); - /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. */ - return i915_gem_init_stolen(mem->i915); + return i915_gem_init_stolen(mem); +} + +static void release_stolen_smem(struct intel_memory_region *mem) +{ + i915_gem_cleanup_stolen(mem->i915); +} + +static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { + .init = init_stolen_smem, + .release = release_stolen_smem, + .init_object = _i915_gem_object_stolen_init, +}; + +static int init_stolen_lmem(struct intel_memory_region *mem) +{ + int err; + + if (GEM_WARN_ON(resource_size(&mem->region) == 0)) + return -ENODEV; + + if (!io_mapping_init_wc(&mem->iomap, + mem->io_start, + resource_size(&mem->region))) + return -EIO; + + /* + * TODO: For stolen lmem we mostly just care about populating the dsm + * related bits and setting up the drm_mm allocator for the range. + * Perhaps split up i915_gem_init_stolen() for this. + */ + err = i915_gem_init_stolen(mem); + if (err) + goto err_fini; + + return 0; + +err_fini: + io_mapping_fini(&mem->iomap); + return err; } -static void release_stolen(struct intel_memory_region *mem) +static void release_stolen_lmem(struct intel_memory_region *mem) { + io_mapping_fini(&mem->iomap); i915_gem_cleanup_stolen(mem->i915); } -static const struct intel_memory_region_ops i915_region_stolen_ops = { - .init = init_stolen, - .release = release_stolen, +static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { + .init = init_stolen_lmem, + .release = release_stolen_lmem, .init_object = _i915_gem_object_stolen_init, }; -struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) +struct intel_memory_region * +i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, + u16 instance) +{ + struct intel_uncore *uncore = &i915->uncore; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct intel_memory_region *mem; + resource_size_t io_start; + resource_size_t lmem_size; + u64 lmem_base; + + lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE); + if (GEM_WARN_ON(lmem_base >= pci_resource_len(pdev, 2))) + return ERR_PTR(-ENODEV); + + lmem_size = pci_resource_len(pdev, 2) - lmem_base; + io_start = pci_resource_start(pdev, 2) + lmem_base; + + mem = intel_memory_region_create(i915, lmem_base, lmem_size, + I915_GTT_PAGE_SIZE_4K, io_start, + type, instance, + &i915_region_stolen_lmem_ops); + if (IS_ERR(mem)) + return mem; + + /* + * TODO: consider creating common helper to just print all the + * interesting stuff from intel_memory_region, which we can use for all + * our probed regions. + */ + + drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", + &mem->io_start); + + intel_memory_region_set_name(mem, "stolen-local"); + + mem->private = true; + + return mem; +} + +struct intel_memory_region* +i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, + u16 instance) { - return intel_memory_region_create(i915, - intel_graphics_stolen_res.start, - resource_size(&intel_graphics_stolen_res), - PAGE_SIZE, 0, - &i915_region_stolen_ops); + struct intel_memory_region *mem; + + mem = intel_memory_region_create(i915, + intel_graphics_stolen_res.start, + resource_size(&intel_graphics_stolen_res), + PAGE_SIZE, 0, type, instance, + &i915_region_stolen_smem_ops); + if (IS_ERR(mem)) + return mem; + + intel_memory_region_set_name(mem, "stolen-system"); + + mem->private = true; + return mem; } struct drm_i915_gem_object * @@ -728,7 +837,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, resource_size_t size) { - struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN_SMEM]; + struct intel_memory_region *mem = i915->mm.stolen_region; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; @@ -742,8 +851,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, /* KISS and expect everything to be page-aligned */ if (GEM_WARN_ON(size == 0) || - GEM_WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || - GEM_WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) + GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) || + GEM_WARN_ON(!IS_ALIGNED(stolen_offset, mem->min_page_size))) return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index b03489706796..ccdf7befc571 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -21,7 +21,13 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, u64 end); void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, struct drm_mm_node *node); -struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915); +struct intel_memory_region * +i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, + u16 instance); +struct intel_memory_region * +i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, + u16 instance); + struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, resource_size_t size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 9e8945013090..ef4d0f7dc118 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -62,14 +62,14 @@ u32 i915_gem_fence_size(struct drm_i915_private *i915, GEM_BUG_ON(!stride); - if (INTEL_GEN(i915) >= 4) { + if (GRAPHICS_VER(i915) >= 4) { stride *= i915_gem_tile_height(tiling); GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); return roundup(size, stride); } /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN(i915, 3)) + if (GRAPHICS_VER(i915) == 3) ggtt_size = 1024*1024; else ggtt_size = 512*1024; @@ -102,7 +102,7 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, if (tiling == I915_TILING_NONE) return I915_GTT_MIN_ALIGNMENT; - if (INTEL_GEN(i915) >= 4) + if (GRAPHICS_VER(i915) >= 4) return I965_FENCE_PAGE; /* @@ -130,10 +130,10 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, /* check maximum stride & object size */ /* i965+ stores the end address of the gtt mapping in the fence * reg, so dont bother to check the size */ - if (INTEL_GEN(i915) >= 7) { + if (GRAPHICS_VER(i915) >= 7) { if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL) return false; - } else if (INTEL_GEN(i915) >= 4) { + } else if (GRAPHICS_VER(i915) >= 4) { if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) return false; } else { @@ -144,7 +144,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, return false; } - if (IS_GEN(i915, 2) || + if (GRAPHICS_VER(i915) == 2 || (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) tile_width = 128; else diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index a657b99ec760..7487bab11f0b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -85,8 +85,8 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true; /* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout_rcu(obj->base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(obj->base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); @@ -173,7 +173,7 @@ alloc_table: goto err; } - sg_page_sizes = i915_sg_page_sizes(st->sgl); + sg_page_sizes = i915_sg_dma_sizes(st->sgl); __i915_gem_object_set_pages(obj, st, sg_page_sizes); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 4b9856d5ba14..1e97520c62b2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -45,7 +45,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); + ret = dma_resv_get_fences(resv, &excl, &count, &shared); if (ret) return ret; @@ -73,7 +73,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, */ prune_fences = count && timeout >= 0; } else { - excl = dma_resv_get_excl_rcu(resv); + excl = dma_resv_get_excl_unlocked(resv); } if (excl && timeout >= 0) @@ -158,8 +158,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(obj->base.resv, - &excl, &count, &shared); + ret = dma_resv_get_fences(obj->base.resv, &excl, &count, + &shared); if (ret) return ret; @@ -170,7 +170,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, kfree(shared); } else { - excl = dma_resv_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_unlocked(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index d36873885cc1..176e6b22f87f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -152,8 +152,8 @@ static int prepare_blit(const struct tiled_blits *t, struct blit_buffer *src, struct drm_i915_gem_object *batch) { - const int gen = INTEL_GEN(to_i915(batch->base.dev)); - bool use_64b_reloc = gen >= 8; + const int ver = GRAPHICS_VER(to_i915(batch->base.dev)); + bool use_64b_reloc = ver >= 8; u32 src_pitch, dst_pitch; u32 cmd, *cs; @@ -171,7 +171,7 @@ static int prepare_blit(const struct tiled_blits *t, *cs++ = cmd; cmd = MI_FLUSH_DW; - if (gen >= 8) + if (ver >= 8) cmd++; *cs++ = cmd; *cs++ = 0; @@ -179,7 +179,7 @@ static int prepare_blit(const struct tiled_blits *t, *cs++ = 0; cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); - if (gen >= 8) + if (ver >= 8) cmd += 2; src_pitch = t->width * 4; @@ -666,7 +666,7 @@ static int igt_client_tiled_blits(void *arg) int inst = 0; /* Test requires explicit BLT tiling controls */ - if (INTEL_GEN(i915) < 4) + if (GRAPHICS_VER(i915) < 4) return 0; if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */ diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index e937b6629019..13b088cc787e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -221,12 +221,12 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) goto out_rq; } - if (INTEL_GEN(ctx->engine->i915) >= 8) { + if (GRAPHICS_VER(ctx->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; - } else if (INTEL_GEN(ctx->engine->i915) >= 4) { + } else if (GRAPHICS_VER(ctx->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 5fef592390cb..dbcfa28a9d91 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -897,7 +897,7 @@ static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *v { u32 *cmd; - GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8); + GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); if (IS_ERR(cmd)) @@ -932,7 +932,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - if (INTEL_GEN(i915) < 8) + if (GRAPHICS_VER(i915) < 8) return -EINVAL; vma = i915_vma_instance(obj, ce->vm, NULL); @@ -1100,7 +1100,7 @@ __read_slice_count(struct intel_context *ce, return ret; } - if (INTEL_GEN(ce->engine->i915) >= 11) { + if (GRAPHICS_VER(ce->engine->i915) >= 11) { s_mask = GEN11_RPCS_S_CNT_MASK; s_shift = GEN11_RPCS_S_CNT_SHIFT; } else { @@ -1229,7 +1229,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, int inst = 0; int ret = 0; - if (INTEL_GEN(i915) < 9) + if (GRAPHICS_VER(i915) < 9) return 0; if (flags & TEST_RESET) @@ -1518,7 +1518,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, } *cmd++ = MI_STORE_DWORD_IMM_GEN4; - if (INTEL_GEN(i915) >= 8) { + if (GRAPHICS_VER(i915) >= 8) { *cmd++ = lower_32_bits(offset); *cmd++ = upper_32_bits(offset); } else { @@ -1608,7 +1608,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (IS_ERR(obj)) return PTR_ERR(obj); - if (INTEL_GEN(i915) >= 8) { + if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600; vm = i915_gem_context_get_vm_rcu(ctx); @@ -1740,7 +1740,6 @@ out: static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) { struct i915_address_space *vm; - struct page *page; u32 *vaddr; int err = 0; @@ -1748,24 +1747,18 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) if (!vm) return -ENODEV; - page = __px_page(vm->scratch[0]); - if (!page) { + if (!vm->scratch[0]) { pr_err("No scratch page!\n"); return -EINVAL; } - vaddr = kmap(page); - if (!vaddr) { - pr_err("No (mappable) scratch page!\n"); - return -EINVAL; - } + vaddr = __px_vaddr(vm->scratch[0]); memcpy(out, vaddr, sizeof(*out)); if (memchr_inv(vaddr, *out, PAGE_SIZE)) { pr_err("Inconsistent initial state of scratch page!\n"); err = -EINVAL; } - kunmap(page); return err; } @@ -1783,7 +1776,7 @@ static int igt_vm_isolation(void *arg) u32 expected; int err; - if (INTEL_GEN(i915) < 7) + if (GRAPHICS_VER(i915) < 7) return 0; /* @@ -1837,7 +1830,7 @@ static int igt_vm_isolation(void *arg) continue; /* Not all engines have their own GPR! */ - if (INTEL_GEN(i915) < 8 && engine->class != RENDER_CLASS) + if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) continue; while (!__igt_timeout(end_time, NULL)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5cf6df49c333..3a30955285d6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -273,7 +273,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, static unsigned int setup_tile_size(struct tile *tile, struct drm_i915_private *i915) { - if (INTEL_GEN(i915) <= 2) { + if (GRAPHICS_VER(i915) <= 2) { tile->height = 16; tile->width = 128; tile->size = 11; @@ -288,9 +288,9 @@ setup_tile_size(struct tile *tile, struct drm_i915_private *i915) tile->size = 12; } - if (INTEL_GEN(i915) < 4) + if (GRAPHICS_VER(i915) < 4) return 8192 / tile->width; - else if (INTEL_GEN(i915) < 7) + else if (GRAPHICS_VER(i915) < 7) return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; else return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; @@ -386,7 +386,7 @@ static int igt_partial_tiling(void *arg) if (err) goto out_unlock; - if (pitch > 2 && INTEL_GEN(i915) >= 4) { + if (pitch > 2 && GRAPHICS_VER(i915) >= 4) { tile.stride = tile.width * (pitch - 1); err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) @@ -395,7 +395,7 @@ static int igt_partial_tiling(void *arg) goto out_unlock; } - if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + if (pitch < max_pitch && GRAPHICS_VER(i915) >= 4) { tile.stride = tile.width * (pitch + 1); err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) @@ -405,7 +405,7 @@ static int igt_partial_tiling(void *arg) } } - if (INTEL_GEN(i915) >= 4) { + if (GRAPHICS_VER(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; err = check_partial_mappings(obj, &tile, end); @@ -501,7 +501,7 @@ static int igt_smoke_tiling(void *arg) tile.stride = i915_prandom_u32_max_state(max_pitch, &prng); tile.stride = (1 + tile.stride) * tile.width; - if (INTEL_GEN(i915) < 4) + if (GRAPHICS_VER(i915) < 4) tile.stride = rounddown_pow_of_two(tile.stride); } @@ -842,6 +842,24 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) return true; } +static void object_set_placements(struct drm_i915_gem_object *obj, + struct intel_memory_region **placements, + unsigned int n_placements) +{ + GEM_BUG_ON(!n_placements); + + if (n_placements == 1) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr = placements[0]; + + obj->mm.placements = &i915->mm.regions[mr->id]; + obj->mm.n_placements = 1; + } else { + obj->mm.placements = placements; + obj->mm.n_placements = n_placements; + } +} + #define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) static int __igt_mmap(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, @@ -950,6 +968,8 @@ static int igt_mmap(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + object_set_placements(obj, &mr, 1); + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); @@ -1068,6 +1088,8 @@ static int igt_mmap_access(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + object_set_placements(obj, &mr, 1); + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); @@ -1211,6 +1233,8 @@ static int igt_mmap_gpu(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + object_set_placements(obj, &mr, 1); + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); @@ -1354,6 +1378,8 @@ static int igt_mmap_revoke(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + object_set_placements(obj, &mr, 1); + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 0b092c62bb34..b35c1219c852 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -44,7 +44,7 @@ igt_emit_store_dw(struct i915_vma *vma, u32 val) { struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(vma->vm->i915); + const int ver = GRAPHICS_VER(vma->vm->i915); unsigned long n, size; u32 *cmd; int err; @@ -65,14 +65,14 @@ igt_emit_store_dw(struct i915_vma *vma, offset += vma->node.start; for (n = 0; n < count; n++) { - if (gen >= 8) { + if (ver >= 8) { *cmd++ = MI_STORE_DWORD_IMM_GEN4; *cmd++ = lower_32_bits(offset); *cmd++ = upper_32_bits(offset); *cmd++ = val; - } else if (gen >= 4) { + } else if (ver >= 4) { *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); + (ver < 6 ? MI_USE_GGTT : 0); *cmd++ = 0; *cmd++ = offset; *cmd++ = val; @@ -146,7 +146,7 @@ int igt_gpu_fill_dw(struct intel_context *ce, goto skip_request; flags = 0; - if (INTEL_GEN(ce->vm->i915) <= 5) + if (GRAPHICS_VER(ce->vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; err = rq->engine->emit_bb_start(rq, diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index d4f4452ce5ed..4270b5a34a83 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -85,14 +85,14 @@ static int gen6_drpc(struct seq_file *m) gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { gen9_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); gen9_powergate_status = intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS); } - if (INTEL_GEN(i915) <= 7) + if (GRAPHICS_VER(i915) <= 7) sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL); @@ -100,7 +100,7 @@ static int gen6_drpc(struct seq_file *m) yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { seq_printf(m, "Render Well Gating Enabled: %s\n", yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); seq_printf(m, "Media Well Gating Enabled: %s\n", @@ -134,7 +134,7 @@ static int gen6_drpc(struct seq_file *m) seq_printf(m, "Core Power Down: %s\n", yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { seq_printf(m, "Render Power Well: %s\n", (gen9_powergate_status & GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); @@ -150,7 +150,7 @@ static int gen6_drpc(struct seq_file *m) print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); - if (INTEL_GEN(i915) <= 7) { + if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); seq_printf(m, "RC6+ voltage: %dmV\n", @@ -230,7 +230,7 @@ static int drpc_show(struct seq_file *m, void *unused) with_intel_runtime_pm(gt->uncore->rpm, wakeref) { if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); else err = ilk_drpc(m); @@ -250,7 +250,7 @@ static int frequency_show(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(uncore->rpm); - if (IS_GEN(i915, 5)) { + if (GRAPHICS_VER(i915) == 5) { u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); @@ -296,7 +296,7 @@ static int frequency_show(struct seq_file *m, void *unused) seq_printf(m, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); - } else if (INTEL_GEN(i915) >= 6) { + } else if (GRAPHICS_VER(i915) >= 6) { u32 rp_state_limits; u32 gt_perf_status; u32 rp_state_cap; @@ -321,7 +321,7 @@ static int frequency_show(struct seq_file *m, void *unused) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { reqf >>= 23; } else { reqf &= ~GEN6_TURBO_DISABLE; @@ -354,7 +354,7 @@ static int frequency_show(struct seq_file *m, void *unused) intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); /* @@ -363,7 +363,7 @@ static int frequency_show(struct seq_file *m, void *unused) */ pm_isr = 0; pm_iir = 0; - } else if (INTEL_GEN(i915) >= 8) { + } else if (GRAPHICS_VER(i915) >= 8) { pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); @@ -386,14 +386,14 @@ static int frequency_show(struct seq_file *m, void *unused) seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_mask); - if (INTEL_GEN(i915) <= 10) + if (GRAPHICS_VER(i915) <= 10) seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", pm_isr, pm_iir); seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", - (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); seq_printf(m, "Render p-state VID: %d\n", gt_perf_status & 0xff); seq_printf(m, "Render p-state limit: %d\n", @@ -437,20 +437,20 @@ static int frequency_show(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(i915) || - INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -488,7 +488,7 @@ static int llc_show(struct seq_file *m, void *data) { struct intel_gt *gt = m->private; struct drm_i915_private *i915 = gt->i915; - const bool edram = INTEL_GEN(i915) > 8; + const bool edram = GRAPHICS_VER(i915) > 8; struct intel_rps *rps = >->rps; unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; @@ -500,7 +500,7 @@ static int llc_show(struct seq_file *m, void *data) min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq /= GEN9_FREQ_SCALER; max_gpu_freq /= GEN9_FREQ_SCALER; @@ -518,7 +518,7 @@ static int llc_show(struct seq_file *m, void *data) intel_gpu_freq(rps, (gpu_freq * (IS_GEN9_BC(i915) || - INTEL_GEN(i915) >= 10 ? + GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); @@ -580,7 +580,7 @@ static int rps_boost_show(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); - if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) { + if (GRAPHICS_VER(i915) >= 6 && intel_rps_is_active(rps)) { struct intel_uncore *uncore = gt->uncore; u32 rpup, rpupei; u32 rpdown, rpdownei; diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 9646200d2792..61383830505e 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -74,7 +74,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->engine->i915) || IS_GEN(rq->engine->i915, 5)) + if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5) cmd |= MI_INVALIDATE_ISP; } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index e08dff376339..1aee5e6b1b23 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -96,9 +96,8 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, * entries back to scratch. */ - vaddr = kmap_atomic_px(pt); + vaddr = px_vaddr(pt); memset32(vaddr + pte, scratch_pte, count); - kunmap_atomic(vaddr); pte = 0; } @@ -120,7 +119,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, GEM_BUG_ON(!pd->entry[act_pt]); - vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); + vaddr = px_vaddr(i915_pt_entry(pd, act_pt)); do { GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE); vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); @@ -136,12 +135,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } if (++act_pte == GEN6_PTES) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); + vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt)); act_pte = 0; } } while (1); - kunmap_atomic(vaddr); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } @@ -235,7 +232,7 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) goto err_scratch0; } - ret = pin_pt_dma(vm, vm->scratch[1]); + ret = map_pt_dma(vm, vm->scratch[1]); if (ret) goto err_scratch1; @@ -346,7 +343,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - i915_active_init(&vma->active, NULL, NULL); + i915_active_init(&vma->active, NULL, NULL, 0); kref_init(&vma->ref); mutex_init(&vma->pages_mutex); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 732c2ed1d933..94e0a5669f90 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -38,7 +38,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (IS_GEN(rq->engine->i915, 9)) + if (GRAPHICS_VER(rq->engine->i915) == 9) vf_flush_wa = true; /* WaForGAMHang:kbl */ diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 74bf6fc8461f..21c8b7350b7a 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -242,11 +242,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, atomic_read(&pt->used)); GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); - vaddr = kmap_atomic_px(pt); + vaddr = px_vaddr(pt); memset64(vaddr + gen8_pd_index(start, 0), vm->scratch[0]->encode, count); - kunmap_atomic(vaddr); atomic_sub(count, &pt->used); start += count; @@ -375,7 +374,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); - vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; @@ -402,12 +401,10 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, } clflush_cache_range(vaddr, PAGE_SIZE); - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); } } while (1); clflush_cache_range(vaddr, PAGE_SIZE); - kunmap_atomic(vaddr); return idx; } @@ -442,7 +439,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, encode |= GEN8_PDE_PS_2M; page_size = I915_GTT_PAGE_SIZE_2M; - vaddr = kmap_atomic_px(pd); + vaddr = px_vaddr(pd); } else { struct i915_page_table *pt = i915_pt_entry(pd, __gen8_pte_index(start, 1)); @@ -457,7 +454,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) maybe_64K = __gen8_pte_index(start, 1); - vaddr = kmap_atomic_px(pt); + vaddr = px_vaddr(pt); } do { @@ -491,7 +488,6 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, } while (rem >= page_size && index < I915_PDES); clflush_cache_range(vaddr, PAGE_SIZE); - kunmap_atomic(vaddr); /* * Is it safe to mark the 2M block as 64K? -- Either we have @@ -505,9 +501,8 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, !iter->sg && IS_ALIGNED(vma->node.start + vma->node.size, I915_GTT_PAGE_SIZE_2M)))) { - vaddr = kmap_atomic_px(pd); + vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; - kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; /* @@ -523,12 +518,11 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, u16 i; encode = vma->vm->scratch[0]->encode; - vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); + vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); - kunmap_atomic(vaddr); } } @@ -602,7 +596,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) if (IS_ERR(obj)) goto free_scratch; - ret = pin_pt_dma(vm, obj); + ret = map_pt_dma(vm, obj); if (ret) { i915_gem_object_put(obj); goto free_scratch; @@ -639,7 +633,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) if (IS_ERR(pde)) return PTR_ERR(pde); - err = pin_pt_dma(vm, pde->pt.base); + err = map_pt_dma(vm, pde->pt.base); if (err) { free_pd(vm, pde); return err; @@ -674,7 +668,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) goto err_pd; } - err = pin_pt_dma(vm, pd->pt.base); + err = map_pt_dma(vm, pd->pt.base); if (err) goto err_pd; @@ -715,9 +709,12 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) * * Gen12 has inherited the same read-only fault issue from gen11. */ - ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12); + ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + if (HAS_LMEM(gt->i915)) + ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; + else + ppgtt->vm.alloc_pt_dma = alloc_pt_dma; err = gen8_init_scratch(&ppgtt->vm); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 17cf2640b082..4033184f13b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -326,7 +326,6 @@ void intel_context_unpin(struct intel_context *ce) intel_context_put(ce); } -__i915_active_call static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); @@ -385,7 +384,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) mutex_init(&ce->pin_mutex); i915_active_init(&ce->active, - __intel_context_active, __intel_context_retire); + __intel_context_active, __intel_context_retire, 0); } void intel_context_fini(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 8dfd8f656aaa..e86d8255feec 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -76,7 +76,7 @@ intel_context_reconfigure_sseu(struct intel_context *ce, { int ret; - GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8); + GEM_BUG_ON(GRAPHICS_VER(ce->engine->i915) < 8); ret = intel_context_lock_pinned(ce); if (ret) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 47ee8578e511..8d9184920c51 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -13,8 +13,9 @@ #include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" -#include "gt/intel_timeline.h" #include "intel_engine_types.h" +#include "intel_gt_types.h" +#include "intel_timeline.h" #include "intel_workarounds.h" struct drm_printer; @@ -262,6 +263,11 @@ void intel_engine_init_active(struct intel_engine_cs *engine, #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 +static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine) +{ + return engine->gt->submission_method >= INTEL_SUBMISSION_GUC; +} + static inline bool intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 6dbdbde00f14..9ceddfbb1687 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -240,10 +240,10 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) * Though they added more rings on g4x/ilk, they did not add * per-engine HWSTAM until gen6. */ - if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS) + if (GRAPHICS_VER(engine->i915) < 6 && engine->class != RENDER_CLASS) return; - if (INTEL_GEN(engine->i915) >= 3) + if (GRAPHICS_VER(engine->i915) >= 3) ENGINE_WRITE(engine, RING_HWSTAM, mask); else ENGINE_WRITE16(engine, RING_HWSTAM, mask); @@ -255,11 +255,17 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) intel_engine_set_hwsp_writemask(engine, ~0u); } +static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) +{ + GEM_DEBUG_WARN_ON(iir); +} + static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; + u8 guc_class; BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); @@ -288,9 +294,12 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); engine->hw_id = info->hw_id; - engine->guc_id = MAKE_GUC_ID(info->class, info->instance); + guc_class = engine_class_to_guc_class(info->class); + engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); + engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); + + engine->irq_handler = nop_irq_handler; engine->class = info->class; engine->instance = info->instance; @@ -308,7 +317,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) CONFIG_DRM_I915_TIMESLICE_DURATION; /* Override to uninterruptible for OpenCL workloads. */ - if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS) + if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) engine->props.preempt_timeout_ms = 0; engine->defaults = engine->props; /* never to change again */ @@ -345,8 +354,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * HEVC support is present on first engine instance * before Gen11 and on all instances afterwards. */ - if (INTEL_GEN(i915) >= 11 || - (INTEL_GEN(i915) >= 9 && engine->instance == 0)) + if (GRAPHICS_VER(i915) >= 11 || + (GRAPHICS_VER(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_CLASS_CAPABILITY_HEVC; @@ -354,14 +363,14 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * SFC block is present only on even logical engine * instances. */ - if ((INTEL_GEN(i915) >= 11 && + if ((GRAPHICS_VER(i915) >= 11 && (engine->gt->info.vdbox_sfc_access & BIT(engine->instance))) || - (INTEL_GEN(i915) >= 9 && engine->instance == 0)) + (GRAPHICS_VER(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { - if (INTEL_GEN(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; } @@ -459,7 +468,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) info->engine_mask = INTEL_INFO(i915)->platform_engine_mask; - if (INTEL_GEN(i915) < 11) + if (GRAPHICS_VER(i915) < 11) return info->engine_mask; media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); @@ -485,7 +494,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) * hooked up to an SFC (Scaler & Format Converter) unit. * In TGL each VDBOX has access to an SFC. */ - if (INTEL_GEN(i915) >= 12 || logical_vdbox++ % 2 == 0) + if (GRAPHICS_VER(i915) >= 12 || logical_vdbox++ % 2 == 0) gt->info.vdbox_sfc_access |= BIT(i); } drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", @@ -722,7 +731,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_whitelist(engine); intel_engine_init_ctx_wa(engine); - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; return 0; @@ -898,7 +907,7 @@ static int engine_init_common(struct intel_engine_cs *engine) return 0; err_context: - intel_context_put(ce); + destroy_pinned_context(ce); return ret; } @@ -909,12 +918,16 @@ int intel_engines_init(struct intel_gt *gt) enum intel_engine_id id; int err; - if (intel_uc_uses_guc_submission(>->uc)) + if (intel_uc_uses_guc_submission(>->uc)) { + gt->submission_method = INTEL_SUBMISSION_GUC; setup = intel_guc_submission_setup; - else if (HAS_EXECLISTS(gt->i915)) + } else if (HAS_EXECLISTS(gt->i915)) { + gt->submission_method = INTEL_SUBMISSION_ELSP; setup = intel_execlists_submission_setup; - else + } else { + gt->submission_method = INTEL_SUBMISSION_RING; setup = intel_ring_submission_setup; + } for_each_engine(engine, gt, id) { err = engine_setup_common(engine); @@ -986,9 +999,9 @@ u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) u64 acthd; - if (INTEL_GEN(i915) >= 8) + if (GRAPHICS_VER(i915) >= 8) acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW); - else if (INTEL_GEN(i915) >= 4) + else if (GRAPHICS_VER(i915) >= 4) acthd = ENGINE_READ(engine, RING_ACTHD); else acthd = ENGINE_READ(engine, ACTHD); @@ -1000,7 +1013,7 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) { u64 bbaddr; - if (INTEL_GEN(engine->i915) >= 8) + if (GRAPHICS_VER(engine->i915) >= 8) bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW); else bbaddr = ENGINE_READ(engine, RING_BBADDR); @@ -1047,7 +1060,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) { int err = 0; - if (INTEL_GEN(engine->i915) < 3) + if (GRAPHICS_VER(engine->i915) < 3) return -ENODEV; ENGINE_TRACE(engine, "\n"); @@ -1097,7 +1110,7 @@ read_subslice_reg(const struct intel_engine_cs *engine, u32 mcr_mask, mcr_ss, mcr, old_mcr, val; enum forcewake_domains fw_domains; - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); } else { @@ -1146,7 +1159,7 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, memset(instdone, 0, sizeof(*instdone)); - switch (INTEL_GEN(i915)) { + switch (GRAPHICS_VER(i915)) { default: instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); @@ -1156,7 +1169,7 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); - if (INTEL_GEN(i915) >= 12) { + if (GRAPHICS_VER(i915) >= 12) { instdone->slice_common_extra[0] = intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA); instdone->slice_common_extra[1] = @@ -1219,7 +1232,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) idle = false; /* No bit for gen2, so assume the CS parser is idle */ - if (INTEL_GEN(engine->i915) > 2 && + if (GRAPHICS_VER(engine->i915) > 2 && !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) idle = false; @@ -1316,7 +1329,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { - switch (INTEL_GEN(engine->i915)) { + switch (GRAPHICS_VER(engine->i915)) { case 2: return false; /* uses physical not virtual addresses */ case 3: @@ -1421,7 +1434,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, struct intel_engine_execlists * const execlists = &engine->execlists; u64 addr; - if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) + if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); if (HAS_EXECLISTS(dev_priv)) { drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", @@ -1438,13 +1451,13 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tRING_CTL: 0x%08x%s\n", ENGINE_READ(engine, RING_CTL), ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); - if (INTEL_GEN(engine->i915) > 2) { + if (GRAPHICS_VER(engine->i915) > 2) { drm_printf(m, "\tRING_MODE: 0x%08x%s\n", ENGINE_READ(engine, RING_MI_MODE), ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : ""); } - if (INTEL_GEN(dev_priv) >= 6) { + if (GRAPHICS_VER(dev_priv) >= 6) { drm_printf(m, "\tRING_IMR: 0x%08x\n", ENGINE_READ(engine, RING_IMR)); drm_printf(m, "\tRING_ESR: 0x%08x\n", @@ -1461,15 +1474,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, addr = intel_engine_get_last_batch_head(engine); drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", upper_32_bits(addr), lower_32_bits(addr)); - if (INTEL_GEN(dev_priv) >= 8) + if (GRAPHICS_VER(dev_priv) >= 8) addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW); - else if (INTEL_GEN(dev_priv) >= 4) + else if (GRAPHICS_VER(dev_priv) >= 4) addr = ENGINE_READ(engine, RING_DMA_FADD); else addr = ENGINE_READ(engine, DMA_FADD_I8XX); drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", upper_32_bits(addr), lower_32_bits(addr)); - if (INTEL_GEN(dev_priv) >= 4) { + if (GRAPHICS_VER(dev_priv) >= 4) { drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, RING_IPEIR)); drm_printf(m, "\tIPEHR: 0x%08x\n", @@ -1479,7 +1492,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); } - if (intel_engine_in_guc_submission_mode(engine)) { + if (intel_engine_uses_guc(engine)) { /* nothing to print yet */ } else if (HAS_EXECLISTS(dev_priv)) { struct i915_request * const *port, *rq; @@ -1548,7 +1561,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } rcu_read_unlock(); execlists_active_unlock_bh(execlists); - } else if (INTEL_GEN(dev_priv) > 6) { + } else if (GRAPHICS_VER(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 7c9af86fdb1e..47f4397095e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -23,7 +23,7 @@ static void dbg_poison_ce(struct intel_context *ce) if (ce->state) { struct drm_i915_gem_object *obj = ce->state->obj; - int type = i915_coherent_map_type(ce->engine->i915); + int type = i915_coherent_map_type(ce->engine->i915, obj, true); void *map; if (!i915_gem_object_trylock(obj)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 883bafc44902..e113f93b3274 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -402,6 +402,7 @@ struct intel_engine_cs { u32 irq_enable_mask; /* bitmask to enable ring interrupt */ void (*irq_enable)(struct intel_engine_cs *engine); void (*irq_disable)(struct intel_engine_cs *engine); + void (*irq_handler)(struct intel_engine_cs *engine, u16 iir); void (*sanitize)(struct intel_engine_cs *engine); int (*resume)(struct intel_engine_cs *engine); @@ -481,10 +482,9 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_HAS_TIMESLICES BIT(4) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) -#define I915_ENGINE_IS_VIRTUAL BIT(6) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) +#define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; /* @@ -594,12 +594,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine) } static inline bool -intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; -} - -static inline bool intel_engine_is_virtual(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_IS_VIRTUAL; @@ -612,10 +606,10 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) } #define instdone_has_slice(dev_priv___, sseu___, slice___) \ - ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) + ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ - (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ + (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ intel_sseu_has_subslice(sseu__, 0, subslice__)) #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de124870af44..fc77592d88a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -118,6 +118,7 @@ #include "intel_engine_stats.h" #include "intel_execlists_submission.h" #include "intel_gt.h" +#include "intel_gt_irq.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_lrc.h" @@ -1768,7 +1769,6 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) */ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && !reset_in_progress(execlists)); - GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1847,7 +1847,7 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", head, upper_32_bits(csb), lower_32_bits(csb)); - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) promote = gen12_csb_parse(csb); else promote = gen8_csb_parse(csb); @@ -2385,6 +2385,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) rcu_read_unlock(); } +static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) +{ + bool tasklet = false; + + if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { + u32 eir; + + /* Upper 16b are the enabling mask, rsvd for internal errors */ + eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); + ENGINE_TRACE(engine, "CS error: %x\n", eir); + + /* Disable the error interrupt until after the reset */ + if (likely(eir)) { + ENGINE_WRITE(engine, RING_EMR, ~0u); + ENGINE_WRITE(engine, RING_EIR, eir); + WRITE_ONCE(engine->execlists.error_interrupt, eir); + tasklet = true; + } + } + + if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { + WRITE_ONCE(engine->execlists.yield, + ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); + ENGINE_TRACE(engine, "semaphore yield: %08x\n", + engine->execlists.yield); + if (del_timer(&engine->execlists.timer)) + tasklet = true; + } + + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) + tasklet = true; + + if (iir & GT_RENDER_USER_INTERRUPT) + intel_engine_signal_breadcrumbs(engine); + + if (tasklet) + tasklet_hi_schedule(&engine->execlists.tasklet); +} + static void __execlists_kick(struct intel_engine_execlists *execlists) { /* Kick the tasklet for some interrupt coalescing and reset handling */ @@ -2733,7 +2772,7 @@ static void enable_execlists(struct intel_engine_cs *engine) intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ - if (INTEL_GEN(engine->i915) >= 11) + if (GRAPHICS_VER(engine->i915) >= 11) mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); else mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); @@ -3064,7 +3103,7 @@ static void execlists_park(struct intel_engine_cs *engine) static bool can_preempt(struct intel_engine_cs *engine) { - if (INTEL_GEN(engine->i915) > 8) + if (GRAPHICS_VER(engine->i915) > 8) return true; /* GPGPU on bdw requires extra w/a; not implemented */ @@ -3076,29 +3115,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = execlists_submit_request; engine->schedule = i915_schedule; engine->execlists.tasklet.callback = execlists_submission_tasklet; - - engine->reset.prepare = execlists_reset_prepare; - engine->reset.rewind = execlists_reset_rewind; - engine->reset.cancel = execlists_reset_cancel; - engine->reset.finish = execlists_reset_finish; - - engine->park = execlists_park; - engine->unpark = NULL; - - engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (!intel_vgpu_active(engine->i915)) { - engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (can_preempt(engine)) { - engine->flags |= I915_ENGINE_HAS_PREEMPTION; - if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - engine->flags |= I915_ENGINE_HAS_TIMESLICES; - } - } - - if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen8_emit_bb_start_noarb; } static void execlists_shutdown(struct intel_engine_cs *engine) @@ -3129,16 +3145,24 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; + engine->reset.prepare = execlists_reset_prepare; + engine->reset.rewind = execlists_reset_rewind; + engine->reset.cancel = execlists_reset_cancel; + engine->reset.finish = execlists_reset_finish; + + engine->park = execlists_park; + engine->unpark = NULL; + engine->emit_flush = gen8_emit_flush_xcs; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; - if (INTEL_GEN(engine->i915) >= 12) { + if (GRAPHICS_VER(engine->i915) >= 12) { engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; engine->emit_flush = gen12_emit_flush_xcs; } engine->set_default_submission = execlists_set_default_submission; - if (INTEL_GEN(engine->i915) < 11) { + if (GRAPHICS_VER(engine->i915) < 11) { engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; } else { @@ -3149,13 +3173,29 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) * until a more refined solution exists. */ } + intel_engine_set_irq_handler(engine, execlists_irq_handler); + + engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (!intel_vgpu_active(engine->i915)) { + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; + if (can_preempt(engine)) { + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } + } + + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; } static void logical_ring_default_irqs(struct intel_engine_cs *engine) { unsigned int shift = 0; - if (INTEL_GEN(engine->i915) < 11) { + if (GRAPHICS_VER(engine->i915) < 11) { const u8 irq_shifts[] = { [RCS0] = GEN8_RCS_IRQ_SHIFT, [BCS0] = GEN8_BCS_IRQ_SHIFT, @@ -3175,7 +3215,7 @@ static void logical_ring_default_irqs(struct intel_engine_cs *engine) static void rcs_submission_override(struct intel_engine_cs *engine) { - switch (INTEL_GEN(engine->i915)) { + switch (GRAPHICS_VER(engine->i915)) { case 12: engine->emit_flush = gen12_emit_flush_rcs; engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; @@ -3226,13 +3266,13 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) execlists->csb_write = &engine->status_page.addr[intel_hws_csb_write_index(i915)]; - if (INTEL_GEN(i915) < 11) + if (GRAPHICS_VER(i915) < 11) execlists->csb_size = GEN8_CSB_ENTRIES; else execlists->csb_size = GEN11_CSB_ENTRIES; engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); - if (INTEL_GEN(engine->i915) >= 11) { + if (GRAPHICS_VER(engine->i915) >= 11) { execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); } @@ -3884,13 +3924,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, spin_unlock_irqrestore(&engine->active.lock, flags); } -bool -intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) -{ - return engine->set_default_submission == - execlists_set_default_submission; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_execlists.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index fd61dae820e9..4ca9b475e252 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -43,7 +43,4 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); -bool -intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); - #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 38742bf33fa3..20e46b843324 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -107,10 +107,10 @@ static bool needs_idle_maps(struct drm_i915_private *i915) if (!intel_vtd_active()) return false; - if (IS_GEN(i915, 5) && IS_MOBILE(i915)) + if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) return true; - if (IS_GEN(i915, 12)) + if (GRAPHICS_VER(i915) == 12) return true; /* XXX DMAR fault reason 7 */ return false; @@ -176,7 +176,7 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) gen8_ggtt_invalidate(ggtt); - if (INTEL_GEN(i915) >= 12) + if (GRAPHICS_VER(i915) >= 12) intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, GEN12_GUC_TLB_INV_CR_INVALIDATE); else @@ -658,7 +658,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) goto err_ppgtt; i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); - err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); + err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); i915_gem_object_unlock(ppgtt->vm.scratch[0]); if (err) goto err_stash; @@ -746,7 +746,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) mutex_unlock(&ggtt->vm.mutex); i915_address_space_fini(&ggtt->vm); - dma_resv_fini(&ggtt->vm.resv); arch_phys_wc_del(ggtt->mtrr); @@ -768,6 +767,19 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) ggtt_cleanup_hw(ggtt); } +/** + * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after + * all free objects have been drained. + * @i915: i915 device + */ +void i915_ggtt_driver_late_release(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); + dma_resv_fini(&ggtt->vm._resv); +} + static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) { snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; @@ -820,7 +832,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 10) ggtt->gsm = ioremap(phys_addr, size); else ggtt->gsm = ioremap_wc(phys_addr, size); @@ -829,6 +841,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } + kref_init(&ggtt->vm.resv_ref); ret = setup_scratch_page(&ggtt->vm); if (ret) { drm_err(&i915->drm, "Scratch setup failed\n"); @@ -907,9 +920,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_entries = gen8_ggtt_insert_entries; - /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ - if (intel_ggtt_update_needs_vtd_wa(i915) || - IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { + /* + * Serialize GTT updates with aperture access on BXT if VT-d is on, + * and always on CHV. + */ + if (intel_vm_no_concurrent_access_wa(i915)) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; ggtt->vm.bind_async_flags = @@ -1063,7 +1078,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = hsw_pte_encode; else if (IS_VALLEYVIEW(i915)) ggtt->vm.pte_encode = byt_pte_encode; - else if (INTEL_GEN(i915) >= 7) + else if (GRAPHICS_VER(i915) >= 7) ggtt->vm.pte_encode = ivb_pte_encode; else ggtt->vm.pte_encode = snb_pte_encode; @@ -1133,16 +1148,16 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) ggtt->vm.gt = gt; ggtt->vm.i915 = i915; ggtt->vm.dma = i915->drm.dev; - dma_resv_init(&ggtt->vm.resv); + dma_resv_init(&ggtt->vm._resv); - if (INTEL_GEN(i915) <= 5) + if (GRAPHICS_VER(i915) <= 5) ret = i915_gmch_probe(ggtt); - else if (INTEL_GEN(i915) < 8) + else if (GRAPHICS_VER(i915) < 8) ret = gen6_gmch_probe(ggtt); else ret = gen8_gmch_probe(ggtt); if (ret) { - dma_resv_fini(&ggtt->vm.resv); + dma_resv_fini(&ggtt->vm._resv); return ret; } @@ -1194,7 +1209,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) int i915_ggtt_enable_hw(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) + if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt()) return -EIO; return 0; @@ -1259,7 +1274,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) if (flush) wbinvd_on_all_cpus(); - if (INTEL_GEN(ggtt->vm.i915) >= 8) + if (GRAPHICS_VER(ggtt->vm.i915) >= 8) setup_private_pat(ggtt->vm.gt->uncore); intel_ggtt_restore_fences(ggtt); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 8a322594210c..cac7f3f44642 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -56,7 +56,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence) int fence_pitch_shift; u64 val; - if (INTEL_GEN(fence_to_i915(fence)) >= 6) { + if (GRAPHICS_VER(fence_to_i915(fence)) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; @@ -173,9 +173,9 @@ static void fence_write(struct i915_fence_reg *fence) * and explicitly managed for internal users. */ - if (IS_GEN(i915, 2)) + if (GRAPHICS_VER(i915) == 2) i830_write_fence_reg(fence); - else if (IS_GEN(i915, 3)) + else if (GRAPHICS_VER(i915) == 3) i915_write_fence_reg(fence); else i965_write_fence_reg(fence); @@ -188,7 +188,7 @@ static void fence_write(struct i915_fence_reg *fence) static bool gpu_uses_fence_registers(struct i915_fence_reg *fence) { - return INTEL_GEN(fence_to_i915(fence)) < 4; + return GRAPHICS_VER(fence_to_i915(fence)) < 4; } static int fence_update(struct i915_fence_reg *fence, @@ -569,7 +569,7 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) { + if (GRAPHICS_VER(i915) >= 8 || IS_VALLEYVIEW(i915)) { /* * On BDW+, swizzling is not used. We leave the CPU memory * controller in charge of optimizing memory accesses without @@ -579,7 +579,7 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) */ swizzle_x = I915_BIT_6_SWIZZLE_NONE; swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } else if (INTEL_GEN(i915) >= 6) { + } else if (GRAPHICS_VER(i915) >= 6) { if (i915->preserve_bios_swizzle) { if (intel_uncore_read(uncore, DISP_ARB_CTL) & DISP_TILE_SURFACE_SWIZZLING) { @@ -611,14 +611,14 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } } - } else if (IS_GEN(i915, 5)) { + } else if (GRAPHICS_VER(i915) == 5) { /* * On Ironlake whatever DRAM config, GPU always do * same swizzling setup. */ swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; - } else if (IS_GEN(i915, 2)) { + } else if (GRAPHICS_VER(i915) == 2) { /* * As far as we know, the 865 doesn't have these bit 6 * swizzling issues. @@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) * banks of memory are paired and unswizzled on the * uneven portion, so leave that as unknown. */ - if (intel_uncore_read16(uncore, C0DRB3) == - intel_uncore_read16(uncore, C1DRB3)) { + if (intel_uncore_read16(uncore, C0DRB3_BW) == + intel_uncore_read16(uncore, C1DRB3_BW)) { swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } @@ -697,7 +697,7 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) } /* check for L-shaped memory aka modified enhanced addressing */ - if (IS_GEN(i915, 4) && + if (GRAPHICS_VER(i915) == 4 && !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; @@ -844,10 +844,10 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt) if (!i915_ggtt_has_aperture(ggtt)) num_fences = 0; - else if (INTEL_GEN(i915) >= 7 && + else if (GRAPHICS_VER(i915) >= 7 && !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) num_fences = 32; - else if (INTEL_GEN(i915) >= 4 || + else if (GRAPHICS_VER(i915) >= 4 || IS_I945G(i915) || IS_I945GM(i915) || IS_G33(i915) || IS_PINEVIEW(i915)) num_fences = 16; @@ -867,7 +867,7 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt) for (i = 0; i < num_fences; i++) { struct i915_fence_reg *fence = &ggtt->fence_regs[i]; - i915_active_init(&fence->active, NULL, NULL); + i915_active_init(&fence->active, NULL, NULL, 0); fence->ggtt = ggtt; fence->id = i; list_add_tail(&fence->link, &ggtt->fence_list); @@ -895,29 +895,29 @@ void intel_gt_init_swizzling(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; - if (INTEL_GEN(i915) < 5 || + if (GRAPHICS_VER(i915) < 5 || i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) return; intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); - if (IS_GEN(i915, 5)) + if (GRAPHICS_VER(i915) == 5) return; intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL); - if (IS_GEN(i915, 6)) + if (GRAPHICS_VER(i915) == 6) intel_uncore_write(uncore, ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); - else if (IS_GEN(i915, 7)) + else if (GRAPHICS_VER(i915) == 7) intel_uncore_write(uncore, ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); - else if (IS_GEN(i915, 8)) + else if (GRAPHICS_VER(i915) == 8) intel_uncore_write(uncore, GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); else - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(GRAPHICS_VER(i915)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 14e2ffb6c0e5..2694dbb9967e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT*/ /* - * Copyright � 2003-2018 Intel Corporation + * Copyright © 2003-2018 Intel Corporation */ #ifndef _INTEL_GPU_COMMANDS_H_ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8d77dcbad059..2161bf01ef8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -68,8 +68,6 @@ int intel_gt_probe_lmem(struct intel_gt *gt) id = INTEL_REGION_LMEM; mem->id = id; - mem->type = INTEL_MEMORY_LOCAL; - mem->instance = 0; intel_memory_region_set_name(mem, "local%u", mem->instance); @@ -115,10 +113,10 @@ static void init_unused_rings(struct intel_gt *gt) init_unused_ring(gt, SRB1_BASE); init_unused_ring(gt, SRB2_BASE); init_unused_ring(gt, SRB3_BASE); - } else if (IS_GEN(i915, 2)) { + } else if (GRAPHICS_VER(i915) == 2) { init_unused_ring(gt, SRB0_BASE); init_unused_ring(gt, SRB1_BASE); - } else if (IS_GEN(i915, 3)) { + } else if (GRAPHICS_VER(i915) == 3) { init_unused_ring(gt, PRB1_BASE); init_unused_ring(gt, PRB2_BASE); } @@ -135,7 +133,7 @@ int intel_gt_init_hw(struct intel_gt *gt) /* Double layer security blanket, see i915_gem_init() */ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) + if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9) intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); if (IS_HASWELL(i915)) @@ -208,10 +206,10 @@ intel_gt_clear_error_registers(struct intel_gt *gt, struct intel_uncore *uncore = gt->uncore; u32 eir; - if (!IS_GEN(i915, 2)) + if (GRAPHICS_VER(i915) != 2) clear_register(uncore, PGTBL_ER); - if (INTEL_GEN(i915) < 4) + if (GRAPHICS_VER(i915) < 4) clear_register(uncore, IPEIR(RENDER_RING_BASE)); else clear_register(uncore, IPEIR_I965); @@ -229,13 +227,13 @@ intel_gt_clear_error_registers(struct intel_gt *gt, I915_MASTER_ERROR_INTERRUPT); } - if (INTEL_GEN(i915) >= 12) { + if (GRAPHICS_VER(i915) >= 12) { rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); - } else if (INTEL_GEN(i915) >= 8) { + } else if (GRAPHICS_VER(i915) >= 8) { rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); - } else if (INTEL_GEN(i915) >= 6) { + } else if (GRAPHICS_VER(i915) >= 6) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -273,7 +271,7 @@ static void gen8_check_faults(struct intel_gt *gt) i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; u32 fault; - if (INTEL_GEN(gt->i915) >= 12) { + if (GRAPHICS_VER(gt->i915) >= 12) { fault_reg = GEN12_RING_FAULT_REG; fault_data0_reg = GEN12_FAULT_TLB_DATA0; fault_data1_reg = GEN12_FAULT_TLB_DATA1; @@ -313,9 +311,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; /* From GEN8 onwards we only have one 'All Engine Fault Register' */ - if (INTEL_GEN(i915) >= 8) + if (GRAPHICS_VER(i915) >= 8) gen8_check_faults(gt); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) gen6_check_faults(gt); else return; @@ -367,7 +365,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) void intel_gt_chipset_flush(struct intel_gt *gt) { wmb(); - if (INTEL_GEN(gt->i915) < 6) + if (GRAPHICS_VER(gt->i915) < 6) intel_gtt_chipset_flush(); } @@ -591,7 +589,8 @@ int intel_gt_init(struct intel_gt *gt) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); - err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + err = intel_gt_init_scratch(gt, + GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K); if (err) goto out_fw; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index c59468107598..aa0a59c5b614 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -98,7 +98,6 @@ static void pool_free_work(struct work_struct *wrk) round_jiffies_up_relative(HZ)); } -__i915_active_call static void pool_retire(struct i915_active *ref) { struct intel_gt_buffer_pool_node *node = @@ -154,7 +153,7 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz, node->age = 0; node->pool = pool; node->pinned = false; - i915_active_init(&node->active, NULL, pool_retire); + i915_active_init(&node->active, NULL, pool_retire, 0); obj = i915_gem_object_create_internal(gt->i915, sz); if (IS_ERR(obj)) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 582fcaee11aa..9f0e729d2d15 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -76,7 +76,7 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) u32 f19_2_mhz = 19200000; u32 f24_mhz = 24000000; - if (INTEL_GEN(uncore->i915) <= 4) { + if (GRAPHICS_VER(uncore->i915) <= 4) { /* * PRMs say: * @@ -85,7 +85,7 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) * (“CLKCFG”) MCHBAR register) */ return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16; - } else if (INTEL_GEN(uncore->i915) <= 8) { + } else if (GRAPHICS_VER(uncore->i915) <= 8) { /* * PRMs say: * @@ -94,7 +94,7 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) * rolling over every 1.5 hours). */ return f12_5_mhz; - } else if (INTEL_GEN(uncore->i915) <= 9) { + } else if (GRAPHICS_VER(uncore->i915) <= 9) { u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); u32 freq = 0; @@ -113,7 +113,7 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) } return freq; - } else if (INTEL_GEN(uncore->i915) <= 12) { + } else if (GRAPHICS_VER(uncore->i915) <= 12) { u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); u32 freq = 0; @@ -128,7 +128,7 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) } else { u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); - if (INTEL_GEN(uncore->i915) <= 10) + if (GRAPHICS_VER(uncore->i915) <= 10) freq = gen10_get_crystal_clock_freq(uncore, c0); else freq = gen11_get_crystal_clock_freq(uncore, c0); @@ -211,7 +211,7 @@ u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns) * frozen machine. */ val = div_u64_roundup(intel_gt_ns_to_clock_interval(gt, ns), 16); - if (IS_GEN(gt->i915, 6)) + if (GRAPHICS_VER(gt->i915) == 6) val = div_u64_roundup(val, 25) * 25; return val; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 9fc6c912a4e5..c13462274fe8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir) intel_guc_to_host_event_handler(guc); } -static void -cs_irq_handler(struct intel_engine_cs *engine, u32 iir) -{ - bool tasklet = false; - - if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { - u32 eir; - - /* Upper 16b are the enabling mask, rsvd for internal errors */ - eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); - ENGINE_TRACE(engine, "CS error: %x\n", eir); - - /* Disable the error interrupt until after the reset */ - if (likely(eir)) { - ENGINE_WRITE(engine, RING_EMR, ~0u); - ENGINE_WRITE(engine, RING_EIR, eir); - WRITE_ONCE(engine->execlists.error_interrupt, eir); - tasklet = true; - } - } - - if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { - WRITE_ONCE(engine->execlists.yield, - ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); - ENGINE_TRACE(engine, "semaphore yield: %08x\n", - engine->execlists.yield); - if (del_timer(&engine->execlists.timer)) - tasklet = true; - } - - if (iir & GT_CONTEXT_SWITCH_INTERRUPT) - tasklet = true; - - if (iir & GT_RENDER_USER_INTERRUPT) { - intel_engine_signal_breadcrumbs(engine); - tasklet |= intel_engine_needs_breadcrumb_tasklet(engine); - } - - if (tasklet) - tasklet_hi_schedule(&engine->execlists.tasklet); -} - static u32 gen11_gt_engine_identity(struct intel_gt *gt, const unsigned int bank, const unsigned int bit) @@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, engine = NULL; if (likely(engine)) - return cs_irq_handler(engine, iir); + return intel_engine_cs_irq(engine, iir); WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", class, instance); @@ -236,14 +194,18 @@ void gen11_gt_irq_reset(struct intel_gt *gt) void gen11_gt_irq_postinstall(struct intel_gt *gt) { - const u32 irqs = - GT_CS_MASTER_ERROR_INTERRUPT | - GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT | - GT_WAIT_SEMAPHORE_INTERRUPT; struct intel_uncore *uncore = gt->uncore; - const u32 dmask = irqs << 16 | irqs; - const u32 smask = irqs << 16; + u32 irqs = GT_RENDER_USER_INTERRUPT; + u32 dmask; + u32 smask; + + if (!intel_uc_wants_guc_submission(>->uc)) + irqs |= GT_CS_MASTER_ERROR_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; + + dmask = irqs << 16 | irqs; + smask = irqs << 16; BUILD_BUG_ON(irqs & 0xffff0000); @@ -275,9 +237,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); + intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0], + gt_iir); + if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); + intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0], + gt_iir); } static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) @@ -301,11 +266,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); + intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0], + gt_iir); + if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); + intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0], + gt_iir >> 12); + if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]); + intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0], + gt_iir >> 22); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -324,10 +294,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl) if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { iir = raw_reg_read(regs, GEN8_GT_IIR(0)); if (likely(iir)) { - cs_irq_handler(gt->engine_class[RENDER_CLASS][0], - iir >> GEN8_RCS_IRQ_SHIFT); - cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0], - iir >> GEN8_BCS_IRQ_SHIFT); + intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0], + iir >> GEN8_RCS_IRQ_SHIFT); + intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0], + iir >> GEN8_BCS_IRQ_SHIFT); raw_reg_write(regs, GEN8_GT_IIR(0), iir); } } @@ -335,10 +305,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl) if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { iir = raw_reg_read(regs, GEN8_GT_IIR(1)); if (likely(iir)) { - cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0], - iir >> GEN8_VCS0_IRQ_SHIFT); - cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1], - iir >> GEN8_VCS1_IRQ_SHIFT); + intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0], + iir >> GEN8_VCS0_IRQ_SHIFT); + intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1], + iir >> GEN8_VCS1_IRQ_SHIFT); raw_reg_write(regs, GEN8_GT_IIR(1), iir); } } @@ -346,8 +316,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl) if (master_ctl & GEN8_GT_VECS_IRQ) { iir = raw_reg_read(regs, GEN8_GT_IIR(3)); if (likely(iir)) { - cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0], - iir >> GEN8_VECS_IRQ_SHIFT); + intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0], + iir >> GEN8_VECS_IRQ_SHIFT); raw_reg_write(regs, GEN8_GT_IIR(3), iir); } } @@ -429,7 +399,7 @@ void gen5_gt_irq_reset(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; GEN3_IRQ_RESET(uncore, GT); - if (INTEL_GEN(gt->i915) >= 6) + if (GRAPHICS_VER(gt->i915) >= 6) GEN3_IRQ_RESET(uncore, GEN6_PM); } @@ -447,14 +417,14 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt) } gt_irqs |= GT_RENDER_USER_INTERRUPT; - if (IS_GEN(gt->i915, 5)) + if (GRAPHICS_VER(gt->i915) == 5) gt_irqs |= ILK_BSD_USER_INTERRUPT; else gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs); - if (INTEL_GEN(gt->i915) >= 6) { + if (GRAPHICS_VER(gt->i915) >= 6) { /* * RPS interrupts will get enabled/disabled on demand when RPS * itself is enabled/disabled. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h index f667e976fb2b..41cad38668c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h @@ -8,6 +8,8 @@ #include <linux/types.h> +#include "intel_engine_types.h" + struct intel_gt; #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ @@ -39,4 +41,25 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl); void gen8_gt_irq_reset(struct intel_gt *gt); void gen8_gt_irq_postinstall(struct intel_gt *gt); +static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u16 iir) +{ + if (iir) + engine->irq_handler(engine, iir); +} + +static inline void +intel_engine_set_irq_handler(struct intel_engine_cs *engine, + void (*fn)(struct intel_engine_cs *engine, + u16 iir)) +{ + /* + * As the interrupt is live as allocate and setup the engines, + * err on the side of caution and apply barriers to updating + * the irq handler callback. This assures that when we do use + * the engine, we will receive interrupts only to ourselves, + * and not lose any. + */ + smp_store_mb(engine->irq_handler, fn); +} + #endif /* INTEL_GT_IRQ_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c index 811a11ed181c..fe51f894b073 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c @@ -16,10 +16,10 @@ static void write_pm_imr(struct intel_gt *gt) u32 mask = gt->pm_imr; i915_reg_t reg; - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { reg = GEN11_GPM_WGBOXPERF_INTR_MASK; mask <<= 16; /* pm is in upper half */ - } else if (INTEL_GEN(i915) >= 8) { + } else if (GRAPHICS_VER(i915) >= 8) { reg = GEN8_GT_IMR(2); } else { reg = GEN6_PMIMR; @@ -61,7 +61,7 @@ void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask) void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask) { struct intel_uncore *uncore = gt->uncore; - i915_reg_t reg = INTEL_GEN(gt->i915) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; + i915_reg_t reg = GRAPHICS_VER(gt->i915) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; lockdep_assert_held(>->irq_lock); @@ -77,10 +77,10 @@ static void write_pm_ier(struct intel_gt *gt) u32 mask = gt->pm_ier; i915_reg_t reg; - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { reg = GEN11_GPM_WGBOXPERF_INTR_ENABLE; mask <<= 16; /* pm is in upper half */ - } else if (INTEL_GEN(i915) >= 8) { + } else if (GRAPHICS_VER(i915) >= 8) { reg = GEN8_GT_IER(2); } else { reg = GEN6_PMIER; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 0caf6ca0a784..fecfacf551d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -31,6 +31,12 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; +enum intel_submission_method { + INTEL_SUBMISSION_RING, + INTEL_SUBMISSION_ELSP, + INTEL_SUBMISSION_GUC, +}; + struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -118,6 +124,7 @@ struct intel_gt { struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] [MAX_ENGINE_INSTANCE + 1]; + enum intel_submission_method submission_method; /* * Default address space (either GGTT or ppGTT depending on arch). diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 941f8af016d6..084ea65d59c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -7,10 +7,29 @@ #include <linux/fault-inject.h> +#include "gem/i915_gem_lmem.h" #include "i915_trace.h" #include "intel_gt.h" #include "intel_gtt.h" +struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) +{ + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_lmem(vm->i915, sz, 0); + /* + * Ensure all paging structures for this vm share the same dma-resv + * object underneath, with the idea that one object_lock() will lock + * them all at once. + */ + if (!IS_ERR(obj)) { + obj->base.resv = i915_vm_resv_get(vm); + obj->shares_resv_from = vm; + } + + return obj; +} + struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) { struct drm_i915_gem_object *obj; @@ -19,33 +38,42 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) i915_gem_shrink_all(vm->i915); obj = i915_gem_object_create_internal(vm->i915, sz); - /* ensure all dma objects have the same reservation class */ - if (!IS_ERR(obj)) - obj->base.resv = &vm->resv; + /* + * Ensure all paging structures for this vm share the same dma-resv + * object underneath, with the idea that one object_lock() will lock + * them all at once. + */ + if (!IS_ERR(obj)) { + obj->base.resv = i915_vm_resv_get(vm); + obj->shares_resv_from = vm; + } + return obj; } -int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) +int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) { - int err; + enum i915_map_type type; + void *vaddr; - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_pin_pages(obj); - i915_gem_object_unlock(obj); - if (err) - return err; + type = i915_coherent_map_type(vm->i915, obj, true); + vaddr = i915_gem_object_pin_map_unlocked(obj, type); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); i915_gem_object_make_unshrinkable(obj); return 0; } -int pin_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) +int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) { - int err; + enum i915_map_type type; + void *vaddr; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; + type = i915_coherent_map_type(vm->i915, obj, true); + vaddr = i915_gem_object_pin_map(obj, type); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); i915_gem_object_make_unshrinkable(obj); return 0; @@ -80,7 +108,7 @@ void __i915_vm_close(struct i915_address_space *vm) int i915_vm_lock_objects(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) { - if (vm->scratch[0]->base.resv == &vm->resv) { + if (vm->scratch[0]->base.resv == &vm->_resv) { return i915_gem_object_lock(vm->scratch[0], ww); } else { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); @@ -96,6 +124,22 @@ void i915_address_space_fini(struct i915_address_space *vm) mutex_destroy(&vm->mutex); } +/** + * i915_vm_resv_release - Final struct i915_address_space destructor + * @kref: Pointer to the &i915_address_space.resv_ref member. + * + * This function is called when the last lock sharer no longer shares the + * &i915_address_space._resv lock. + */ +void i915_vm_resv_release(struct kref *kref) +{ + struct i915_address_space *vm = + container_of(kref, typeof(*vm), resv_ref); + + dma_resv_fini(&vm->_resv); + kfree(vm); +} + static void __i915_vm_release(struct work_struct *work) { struct i915_address_space *vm = @@ -103,9 +147,8 @@ static void __i915_vm_release(struct work_struct *work) vm->cleanup(vm); i915_address_space_fini(vm); - dma_resv_fini(&vm->resv); - kfree(vm); + i915_vm_resv_put(vm); } void i915_vm_release(struct kref *kref) @@ -122,6 +165,14 @@ void i915_vm_release(struct kref *kref) void i915_address_space_init(struct i915_address_space *vm, int subclass) { kref_init(&vm->ref); + + /* + * Special case for GGTT that has already done an early + * kref_init here. + */ + if (!kref_read(&vm->resv_ref)) + kref_init(&vm->resv_ref); + INIT_RCU_WORK(&vm->rcu, __i915_vm_release); atomic_set(&vm->open, 1); @@ -132,8 +183,23 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) */ mutex_init(&vm->mutex); lockdep_set_subclass(&vm->mutex, subclass); - i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); - dma_resv_init(&vm->resv); + + if (!intel_vm_no_concurrent_access_wa(vm->i915)) { + i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); + } else { + /* + * CHV + BXT VTD workaround use stop_machine(), + * which is allowed to allocate memory. This means &vm->mutex + * is the outer lock, and in theory we can allocate memory inside + * it through stop_machine(). + * + * Add the annotation for this, we use trylock in shrinker. + */ + mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); + might_alloc(GFP_KERNEL); + mutex_release(&vm->mutex.dep_map, _THIS_IP_); + } + dma_resv_init(&vm->_resv); GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); @@ -155,6 +221,14 @@ void clear_pages(struct i915_vma *vma) memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); } +void *__px_vaddr(struct drm_i915_gem_object *p) +{ + enum i915_map_type type; + + GEM_BUG_ON(!i915_gem_object_has_pages(p)); + return page_unpack_bits(p->mm.mapping, &type); +} + dma_addr_t __px_dma(struct drm_i915_gem_object *p) { GEM_BUG_ON(!i915_gem_object_has_pages(p)); @@ -170,32 +244,22 @@ struct page *__px_page(struct drm_i915_gem_object *p) void fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) { - struct page *page = __px_page(p); - void *vaddr; + void *vaddr = __px_vaddr(p); - vaddr = kmap(page); memset64(vaddr, val, count); clflush_cache_range(vaddr, PAGE_SIZE); - kunmap(page); } static void poison_scratch_page(struct drm_i915_gem_object *scratch) { - struct sgt_iter sgt; - struct page *page; + void *vaddr = __px_vaddr(scratch); u8 val; val = 0; if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) val = POISON_FREE; - for_each_sgt_page(page, sgt, scratch->mm.pages) { - void *vaddr; - - vaddr = kmap(page); - memset(vaddr, val, PAGE_SIZE); - kunmap(page); - } + memset(vaddr, val, scratch->base.size); } int setup_scratch_page(struct i915_address_space *vm) @@ -225,7 +289,7 @@ int setup_scratch_page(struct i915_address_space *vm) if (IS_ERR(obj)) goto skip; - if (pin_pt_dma(vm, obj)) + if (map_pt_dma(vm, obj)) goto skip_obj; /* We need a single contiguous page for our scratch */ @@ -292,7 +356,7 @@ void gtt_write_workarounds(struct intel_gt *gt) intel_uncore_write(uncore, GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); - else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) + else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) intel_uncore_write(uncore, GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); @@ -309,13 +373,13 @@ void gtt_write_workarounds(struct intel_gt *gt) * driver. */ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && - INTEL_GEN(i915) <= 10) + GRAPHICS_VER(i915) <= 10) intel_uncore_rmw(uncore, GEN8_GAMW_ECO_DEV_RW_IA, 0, GAMW_ECO_ENABLE_64K_IPS_FIELD); - if (IS_GEN_RANGE(i915, 8, 11)) { + if (IS_GRAPHICS_VER(i915, 8, 11)) { bool can_use_gtt_cache = true; /* @@ -397,7 +461,7 @@ static void bdw_setup_private_ppat(struct intel_uncore *uncore) GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); /* for scanout with eLLC */ - if (INTEL_GEN(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9) pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); else pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); @@ -446,11 +510,11 @@ void setup_private_pat(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; - GEM_BUG_ON(INTEL_GEN(i915) < 8); + GEM_BUG_ON(GRAPHICS_VER(i915) < 8); - if (INTEL_GEN(i915) >= 12) + if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); - else if (INTEL_GEN(i915) >= 10) + else if (GRAPHICS_VER(i915) >= 10) cnl_setup_private_ppat(uncore); else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) chv_setup_private_ppat(uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 441644f2506a..edea95b97c36 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -180,6 +180,9 @@ struct page *__px_page(struct drm_i915_gem_object *p); dma_addr_t __px_dma(struct drm_i915_gem_object *p); #define px_dma(px) (__px_dma(px_base(px))) +void *__px_vaddr(struct drm_i915_gem_object *p); +#define px_vaddr(px) (__px_vaddr(px_base(px))) + #define px_pt(px) \ __px_choose_expr(px, struct i915_page_table *, __x, \ __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \ @@ -242,7 +245,9 @@ struct i915_address_space { atomic_t open; struct mutex mutex; /* protects vma and our lists */ - struct dma_resv resv; /* reservation lock for all pd objects, and buffer pool */ + + struct kref resv_ref; /* kref to keep the reservation lock alive. */ + struct dma_resv _resv; /* reservation lock for all pd objects, and buffer pool */ #define VM_CLASS_GGTT 0 #define VM_CLASS_PPGTT 1 #define VM_CLASS_DPT 2 @@ -402,13 +407,36 @@ i915_vm_get(struct i915_address_space *vm) return vm; } +/** + * i915_vm_resv_get - Obtain a reference on the vm's reservation lock + * @vm: The vm whose reservation lock we want to share. + * + * Return: A pointer to the vm's reservation lock. + */ +static inline struct dma_resv *i915_vm_resv_get(struct i915_address_space *vm) +{ + kref_get(&vm->resv_ref); + return &vm->_resv; +} + void i915_vm_release(struct kref *kref); +void i915_vm_resv_release(struct kref *kref); + static inline void i915_vm_put(struct i915_address_space *vm) { kref_put(&vm->ref, i915_vm_release); } +/** + * i915_vm_resv_put - Release a reference on the vm's reservation lock + * @resv: Pointer to a reservation lock obtained from i915_vm_resv_get() + */ +static inline void i915_vm_resv_put(struct i915_address_space *vm) +{ + kref_put(&vm->resv_ref, i915_vm_resv_release); +} + static inline struct i915_address_space * i915_vm_open(struct i915_address_space *vm) { @@ -504,6 +532,7 @@ void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); int i915_init_ggtt(struct drm_i915_private *i915); void i915_ggtt_driver_release(struct drm_i915_private *i915); +void i915_ggtt_driver_late_release(struct drm_i915_private *i915); static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) { @@ -517,8 +546,6 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); -#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px))) - void fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count); @@ -532,12 +559,13 @@ int setup_scratch_page(struct i915_address_space *vm); void free_scratch(struct i915_address_space *vm); struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz); +struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz); struct i915_page_table *alloc_pt(struct i915_address_space *vm); struct i915_page_directory *alloc_pd(struct i915_address_space *vm); struct i915_page_directory *__alloc_pd(int npde); -int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj); -int pin_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj); +int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj); +int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj); void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl); @@ -584,7 +612,7 @@ void setup_private_pat(struct intel_uncore *uncore); int i915_vm_alloc_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, u64 size); -int i915_vm_pin_pt_stash(struct i915_address_space *vm, +int i915_vm_map_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash); void i915_vm_free_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash); diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index 075d741644ae..eb1a15deed22 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -64,7 +64,7 @@ static bool get_ia_constants(struct intel_llc *llc, consts->min_gpu_freq = rps->min_freq; consts->max_gpu_freq = rps->max_freq; - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { /* Convert GT frequency to 50 HZ units */ consts->min_gpu_freq /= GEN9_FREQ_SCALER; consts->max_gpu_freq /= GEN9_FREQ_SCALER; @@ -83,13 +83,13 @@ static void calc_ia_freq(struct intel_llc *llc, const int diff = consts->max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. */ ring_freq = gpu_freq; - } else if (INTEL_GEN(i915) >= 8) { + } else if (GRAPHICS_VER(i915) >= 8) { /* max(2 * GT, DDR). NB: GT is 50MHz units */ ring_freq = max(consts->min_ring_freq, gpu_freq); } else if (IS_HASWELL(i915)) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e86897cde984..a27bac0a4bfb 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -47,7 +47,7 @@ static void set_offsets(u32 *regs, *regs = MI_LOAD_REGISTER_IMM(count); if (flags & POSTED) *regs |= MI_LRI_FORCE_POSTED; - if (INTEL_GEN(engine->i915) >= 11) + if (GRAPHICS_VER(engine->i915) >= 11) *regs |= MI_LRI_LRM_CS_MMIO; regs++; @@ -70,7 +70,7 @@ static void set_offsets(u32 *regs, if (close) { /* Close the batch; used mainly by live_lrc_layout() */ *regs = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) + if (GRAPHICS_VER(engine->i915) >= 10) *regs |= BIT(0); } } @@ -498,22 +498,22 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) * addressing to automatic fixup the register state between the * physical engines for virtual engine. */ - GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 && !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; - else if (INTEL_GEN(engine->i915) >= 11) + else if (GRAPHICS_VER(engine->i915) >= 11) return gen11_rcs_offsets; - else if (INTEL_GEN(engine->i915) >= 9) + else if (GRAPHICS_VER(engine->i915) >= 9) return gen9_rcs_offsets; else return gen8_rcs_offsets; } else { - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; - else if (INTEL_GEN(engine->i915) >= 9) + else if (GRAPHICS_VER(engine->i915) >= 9) return gen9_xcs_offsets; else return gen8_xcs_offsets; @@ -522,9 +522,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) + else if (GRAPHICS_VER(engine->i915) >= 9) return 0x54; else if (engine->class == RENDER_CLASS) return 0x58; @@ -534,9 +534,9 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) return 0x74; - else if (INTEL_GEN(engine->i915) >= 9) + else if (GRAPHICS_VER(engine->i915) >= 9) return 0x68; else if (engine->class == RENDER_CLASS) return 0xd8; @@ -546,9 +546,9 @@ static int lrc_ring_gpr0(const struct intel_engine_cs *engine) static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) { - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) return 0x12; - else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) + else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS) return 0x18; else return -1; @@ -581,9 +581,9 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) if (engine->class != RENDER_CLASS) return -1; - if (INTEL_GEN(engine->i915) >= 12) + if (GRAPHICS_VER(engine->i915) >= 12) return 0xb6; - else if (INTEL_GEN(engine->i915) >= 11) + else if (GRAPHICS_VER(engine->i915) >= 11) return 0xaa; else return -1; @@ -592,9 +592,9 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) static u32 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) { - switch (INTEL_GEN(engine->i915)) { + switch (GRAPHICS_VER(engine->i915)) { default: - MISSING_CASE(INTEL_GEN(engine->i915)); + MISSING_CASE(GRAPHICS_VER(engine->i915)); fallthrough; case 12: return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; @@ -637,7 +637,7 @@ static void init_common_regs(u32 * const regs, ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); if (inhibit) ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; - if (INTEL_GEN(engine->i915) < 11) + if (GRAPHICS_VER(engine->i915) < 11) ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); regs[CTX_CONTEXT_CONTROL] = ctl; @@ -805,7 +805,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ - if (INTEL_GEN(engine->i915) == 12) { + if (GRAPHICS_VER(engine->i915) == 12) { ce->wa_bb_page = context_size / PAGE_SIZE; context_size += PAGE_SIZE; } @@ -903,7 +903,9 @@ lrc_pre_pin(struct intel_context *ce, GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); *vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(ce->engine->i915) | + i915_coherent_map_type(ce->engine->i915, + ce->state->obj, + false) | I915_MAP_OVERRIDE); return PTR_ERR_OR_ZERO(*vaddr); @@ -1112,7 +1114,7 @@ static u32 lrc_descriptor(const struct intel_context *ce) desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - if (IS_GEN(ce->vm->i915, 8)) + if (GRAPHICS_VER(ce->vm->i915) == 8) desc |= GEN8_CTX_L3LLC_COHERENT; return i915_ggtt_offset(ce->state) | desc; @@ -1467,7 +1469,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) if (engine->class != RENDER_CLASS) return; - switch (INTEL_GEN(engine->i915)) { + switch (GRAPHICS_VER(engine->i915)) { case 12: case 11: return; @@ -1484,7 +1486,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) wa_bb_fn[1] = NULL; break; default: - MISSING_CASE(INTEL_GEN(engine->i915)); + MISSING_CASE(GRAPHICS_VER(engine->i915)); return; } diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index b14138fd505c..17848807f111 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -344,11 +344,11 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (INTEL_GEN(i915) >= 12) { + } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (IS_GEN(i915, 11)) { + } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; @@ -361,7 +361,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; } else { - drm_WARN_ONCE(&i915->drm, INTEL_GEN(i915) >= 9, + drm_WARN_ONCE(&i915->drm, GRAPHICS_VER(i915) >= 9, "Platform that should have a MOCS table does not.\n"); return 0; } @@ -370,7 +370,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, return 0; /* WaDisableSkipCaching:skl,bxt,kbl,glk */ - if (IS_GEN(i915, 9)) { + if (GRAPHICS_VER(i915) == 9) { int i; for (i = 0; i < table->size; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 014ae8ac4480..886060f7e6fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -87,11 +87,10 @@ write_dma_entry(struct drm_i915_gem_object * const pdma, const unsigned short idx, const u64 encoded_entry) { - u64 * const vaddr = kmap_atomic(__px_page(pdma)); + u64 * const vaddr = __px_vaddr(pdma); vaddr[idx] = encoded_entry; clflush_cache_range(&vaddr[idx], sizeof(u64)); - kunmap_atomic(vaddr); } void @@ -147,9 +146,9 @@ int i915_ppgtt_init_hw(struct intel_gt *gt) gtt_write_workarounds(gt); - if (IS_GEN(i915, 6)) + if (GRAPHICS_VER(i915) == 6) gen6_ppgtt_enable(gt); - else if (IS_GEN(i915, 7)) + else if (GRAPHICS_VER(i915) == 7) gen7_ppgtt_enable(gt); return 0; @@ -158,7 +157,7 @@ int i915_ppgtt_init_hw(struct intel_gt *gt) static struct i915_ppgtt * __ppgtt_create(struct intel_gt *gt) { - if (INTEL_GEN(gt->i915) < 8) + if (GRAPHICS_VER(gt->i915) < 8) return gen6_ppgtt_create(gt); else return gen8_ppgtt_create(gt); @@ -258,7 +257,7 @@ int i915_vm_alloc_pt_stash(struct i915_address_space *vm, return 0; } -int i915_vm_pin_pt_stash(struct i915_address_space *vm, +int i915_vm_map_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash) { struct i915_page_table *pt; @@ -266,7 +265,7 @@ int i915_vm_pin_pt_stash(struct i915_address_space *vm, for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { for (pt = stash->pt[n]; pt; pt = pt->stash) { - err = pin_pt_dma_locked(vm, pt->base); + err = map_pt_dma_locked(vm, pt->base); if (err) return err; } @@ -308,7 +307,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) ppgtt->vm.dma = i915->drm.dev; ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); - dma_resv_init(&ppgtt->vm.resv); + dma_resv_init(&ppgtt->vm._resv); i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 3b7e62debe7e..259d7eb4e165 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -109,7 +109,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; - if (INTEL_GEN(gt->i915) >= 12) { + if (GRAPHICS_VER(gt->i915) >= 12) { for (i = 0; i < I915_MAX_VCS; i++) if (HAS_ENGINE(gt, _VCS(i))) pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | @@ -126,7 +126,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) enum intel_engine_id id; /* 2b: Program RC6 thresholds.*/ - if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { + if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 10) { set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { @@ -249,9 +249,9 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) rc6vids = 0; ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL); - if (IS_GEN(i915, 6) && ret) { + if (GRAPHICS_VER(i915) == 6 && ret) { drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n"); - } else if (IS_GEN(i915, 6) && + } else if (GRAPHICS_VER(i915) == 6 && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { drm_dbg(&i915->drm, "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", @@ -515,7 +515,7 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) struct intel_uncore *uncore = rc6_to_uncore(rc6); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - if (INTEL_GEN(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9) set(uncore, GEN9_PG_ENABLE, 0); set(uncore, GEN6_RC_CONTROL, 0); set(uncore, GEN6_RC_STATE, 0); @@ -575,13 +575,13 @@ void intel_rc6_enable(struct intel_rc6 *rc6) chv_rc6_enable(rc6); else if (IS_VALLEYVIEW(i915)) vlv_rc6_enable(rc6); - else if (INTEL_GEN(i915) >= 11) + else if (GRAPHICS_VER(i915) >= 11) gen11_rc6_enable(rc6); - else if (INTEL_GEN(i915) >= 9) + else if (GRAPHICS_VER(i915) >= 9) gen9_rc6_enable(rc6); else if (IS_BROADWELL(i915)) gen8_rc6_enable(rc6); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) gen6_rc6_enable(rc6); rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 73fceb0c25fc..f7366b054f8e 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -5,6 +5,8 @@ #include "i915_drv.h" #include "intel_memory_region.h" +#include "intel_region_lmem.h" +#include "intel_region_ttm.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "intel_region_lmem.h" @@ -66,9 +68,9 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem) static void region_lmem_release(struct intel_memory_region *mem) { - release_fake_lmem_bar(mem); + intel_region_ttm_fini(mem); io_mapping_fini(&mem->iomap); - intel_memory_region_release_buddy(mem); + release_fake_lmem_bar(mem); } static int @@ -83,12 +85,21 @@ region_lmem_init(struct intel_memory_region *mem) if (!io_mapping_init_wc(&mem->iomap, mem->io_start, - resource_size(&mem->region))) - return -EIO; + resource_size(&mem->region))) { + ret = -EIO; + goto out_no_io; + } - ret = intel_memory_region_init_buddy(mem); + ret = intel_region_ttm_init(mem); if (ret) - io_mapping_fini(&mem->iomap); + goto out_no_buddy; + + return 0; + +out_no_buddy: + io_mapping_fini(&mem->iomap); +out_no_io: + release_fake_lmem_bar(mem); return ret; } @@ -127,6 +138,8 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) mappable_end, PAGE_SIZE, io_start, + INTEL_MEMORY_LOCAL, + 0, &intel_region_lmem_ops); if (!IS_ERR(mem)) { drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n", @@ -198,6 +211,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size, I915_GTT_PAGE_SIZE_4K, io_start, + INTEL_MEMORY_LOCAL, + 0, &intel_region_lmem_ops); if (IS_ERR(mem)) return mem; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index b03e197b1d99..b575cd6e0b7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -15,7 +15,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine) if (engine->class != RENDER_CLASS) return NULL; - switch (INTEL_GEN(engine->i915)) { + switch (GRAPHICS_VER(engine->i915)) { case 6: return &gen6_null_state; case 7: diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a377c4588aaa..72251638d4ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -338,15 +338,69 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } -static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) +static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) +{ + int vecs_id; + + GEM_BUG_ON(engine->class != VIDEO_DECODE_CLASS); + + vecs_id = _VECS((engine->instance) / 2); + + return engine->gt->engine[vecs_id]; +} + +struct sfc_lock_data { + i915_reg_t lock_reg; + i915_reg_t ack_reg; + i915_reg_t usage_reg; + u32 lock_bit; + u32 ack_bit; + u32 usage_bit; + u32 reset_bit; +}; + +static void get_sfc_forced_lock_data(struct intel_engine_cs *engine, + struct sfc_lock_data *sfc_lock) +{ + switch (engine->class) { + default: + MISSING_CASE(engine->class); + fallthrough; + case VIDEO_DECODE_CLASS: + sfc_lock->lock_reg = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_lock->lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + + sfc_lock->ack_reg = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_lock->ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; + + sfc_lock->usage_reg = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_lock->usage_bit = GEN11_VCS_SFC_USAGE_BIT; + sfc_lock->reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); + + break; + case VIDEO_ENHANCEMENT_CLASS: + sfc_lock->lock_reg = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_lock->lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + + sfc_lock->ack_reg = GEN11_VECS_SFC_LOCK_ACK(engine); + sfc_lock->ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; + + sfc_lock->usage_reg = GEN11_VECS_SFC_USAGE(engine); + sfc_lock->usage_bit = GEN11_VECS_SFC_USAGE_BIT; + sfc_lock->reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); + + break; + } +} + +static int gen11_lock_sfc(struct intel_engine_cs *engine, + u32 *reset_mask, + u32 *unlock_mask) { struct intel_uncore *uncore = engine->uncore; u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access; - i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; - u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; - i915_reg_t sfc_usage; - u32 sfc_usage_bit; - u32 sfc_reset_bit; + struct sfc_lock_data sfc_lock; + bool lock_obtained, lock_to_other = false; int ret; switch (engine->class) { @@ -354,53 +408,72 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) if ((BIT(engine->instance) & vdbox_sfc_access) == 0) return 0; - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; + fallthrough; + case VIDEO_ENHANCEMENT_CLASS: + get_sfc_forced_lock_data(engine, &sfc_lock); - sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); break; + default: + return 0; + } - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + if (!(intel_uncore_read_fw(uncore, sfc_lock.usage_reg) & sfc_lock.usage_bit)) { + struct intel_engine_cs *paired_vecs; - sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); - sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; + if (engine->class != VIDEO_DECODE_CLASS || + GRAPHICS_VER(engine->i915) != 12) + return 0; - sfc_usage = GEN11_VECS_SFC_USAGE(engine); - sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); - break; + /* + * Wa_14010733141 + * + * If the VCS-MFX isn't using the SFC, we also need to check + * whether VCS-HCP is using it. If so, we need to issue a *VE* + * forced lock on the VE engine that shares the same SFC. + */ + if (!(intel_uncore_read_fw(uncore, + GEN12_HCP_SFC_LOCK_STATUS(engine)) & + GEN12_HCP_SFC_USAGE_BIT)) + return 0; - default: - return 0; + paired_vecs = find_sfc_paired_vecs_engine(engine); + get_sfc_forced_lock_data(paired_vecs, &sfc_lock); + lock_to_other = true; + *unlock_mask |= paired_vecs->mask; + } else { + *unlock_mask |= engine->mask; } /* - * If the engine is using a SFC, tell the engine that a software reset + * If the engine is using an SFC, tell the engine that a software reset * is going to happen. The engine will then try to force lock the SFC. * If SFC ends up being locked to the engine we want to reset, we have * to reset it as well (we will unlock it once the reset sequence is * completed). */ - if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) - return 0; - - rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); + rmw_set_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); ret = __intel_wait_for_register_fw(uncore, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, + sfc_lock.ack_reg, + sfc_lock.ack_bit, + sfc_lock.ack_bit, 1000, 0, NULL); - /* Was the SFC released while we were trying to lock it? */ - if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) + /* + * Was the SFC released while we were trying to lock it? + * + * We should reset both the engine and the SFC if: + * - We were locking the SFC to this engine and the lock succeeded + * OR + * - We were locking the SFC to a different engine (Wa_14010733141) + * but the SFC was released before the lock was obtained. + * + * Otherwise we need only reset the engine by itself and we can + * leave the SFC alone. + */ + lock_obtained = (intel_uncore_read_fw(uncore, sfc_lock.usage_reg) & + sfc_lock.usage_bit) != 0; + if (lock_obtained == lock_to_other) return 0; if (ret) { @@ -408,7 +481,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) return ret; } - *hw_mask |= sfc_reset_bit; + *reset_mask |= sfc_lock.reset_bit; return 0; } @@ -416,28 +489,19 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access; - i915_reg_t sfc_forced_lock; - u32 sfc_forced_lock_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return; + struct sfc_lock_data sfc_lock = {}; - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - break; + if (engine->class != VIDEO_DECODE_CLASS && + engine->class != VIDEO_ENHANCEMENT_CLASS) + return; - default: + if (engine->class == VIDEO_DECODE_CLASS && + (BIT(engine->instance) & vdbox_sfc_access) == 0) return; - } - rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); + get_sfc_forced_lock_data(engine, &sfc_lock); + + rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } static int gen11_reset_engines(struct intel_gt *gt, @@ -456,23 +520,23 @@ static int gen11_reset_engines(struct intel_gt *gt, }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; - u32 hw_mask; + u32 reset_mask, unlock_mask = 0; int ret; if (engine_mask == ALL_ENGINES) { - hw_mask = GEN11_GRDOM_FULL; + reset_mask = GEN11_GRDOM_FULL; } else { - hw_mask = 0; + reset_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - hw_mask |= hw_engine_mask[engine->id]; - ret = gen11_lock_sfc(engine, &hw_mask); + reset_mask |= hw_engine_mask[engine->id]; + ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask); if (ret) goto sfc_unlock; } } - ret = gen6_hw_domain_reset(gt, hw_mask); + ret = gen6_hw_domain_reset(gt, reset_mask); sfc_unlock: /* @@ -480,10 +544,14 @@ sfc_unlock: * gen11_lock_sfc to make sure that we clean properly if something * wrong happened during the lock (e.g. lock acquired after timeout * expiration). + * + * Due to Wa_14010733141, we may have locked an SFC to an engine that + * wasn't being reset. So instead of calling gen11_unlock_sfc() + * on engine_mask, we instead call it on the mask of engines that our + * gen11_lock_sfc() calls told us actually had locks attempted. */ - if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, gt, engine_mask, tmp) - gen11_unlock_sfc(engine); + for_each_engine_masked(engine, gt, unlock_mask, tmp) + gen11_unlock_sfc(engine); return ret; } @@ -565,7 +633,7 @@ static int gen8_reset_engines(struct intel_gt *gt, */ } - if (INTEL_GEN(gt->i915) >= 11) + if (GRAPHICS_VER(gt->i915) >= 11) ret = gen11_reset_engines(gt, engine_mask, retry); else ret = gen6_reset_engines(gt, engine_mask, retry); @@ -594,17 +662,17 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt) if (is_mock_gt(gt)) return mock_reset; - else if (INTEL_GEN(i915) >= 8) + else if (GRAPHICS_VER(i915) >= 8) return gen8_reset_engines; - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) return gen6_reset_engines; - else if (INTEL_GEN(i915) >= 5) + else if (GRAPHICS_VER(i915) >= 5) return ilk_do_reset; else if (IS_G4X(i915)) return g4x_do_reset; else if (IS_G33(i915) || IS_PINEVIEW(i915)) return g33_do_reset; - else if (INTEL_GEN(i915) >= 3) + else if (GRAPHICS_VER(i915) >= 3) return i915_do_reset; else return NULL; @@ -656,7 +724,7 @@ bool intel_has_reset_engine(const struct intel_gt *gt) int intel_reset_guc(struct intel_gt *gt) { u32 guc_domain = - INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; int ret; GEM_BUG_ON(!HAS_GT_UC(gt->i915)); @@ -1118,7 +1186,6 @@ static int intel_gt_reset_engine(struct intel_engine_cs *engine) int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; - bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); @@ -1134,10 +1201,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!uses_guc) - ret = intel_gt_reset_engine(engine); - else + if (intel_engine_uses_guc(engine)) ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); + else + ret = intel_gt_reset_engine(engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret); diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index aee0a77c77e0..7c4d5158e03b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -51,11 +51,14 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) if (unlikely(ret)) goto err_unpin; - if (i915_vma_is_map_and_fenceable(vma)) + if (i915_vma_is_map_and_fenceable(vma)) { addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); + } else { + int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false); + + addr = i915_gem_object_pin_map(vma->obj, type); + } + if (IS_ERR(addr)) { ret = PTR_ERR(addr); goto err_ring; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 9585546556ee..0c423f096e2b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -12,6 +12,7 @@ #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_gt_irq.h" #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" @@ -28,7 +29,7 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask) * lost interrupts following a reset. */ if (engine->class == RENDER_CLASS) { - if (INTEL_GEN(engine->i915) >= 6) + if (GRAPHICS_VER(engine->i915) >= 6) mask &= ~BIT(0); else mask &= ~I915_USER_INTERRUPT; @@ -42,7 +43,7 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) u32 addr; addr = lower_32_bits(phys); - if (INTEL_GEN(engine->i915) >= 4) + if (GRAPHICS_VER(engine->i915) >= 4) addr |= (phys >> 28) & 0xf0; intel_uncore_write(engine->uncore, HWS_PGA, addr); @@ -70,7 +71,7 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) * The ring status page addresses are no longer next to the rest of * the ring registers as of gen7. */ - if (IS_GEN(engine->i915, 7)) { + if (GRAPHICS_VER(engine->i915) == 7) { switch (engine->id) { /* * No more rings exist on Gen7. Default case is only to shut up @@ -92,7 +93,7 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) hwsp = VEBOX_HWS_PGA_GEN7; break; } - } else if (IS_GEN(engine->i915, 6)) { + } else if (GRAPHICS_VER(engine->i915) == 6) { hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); } else { hwsp = RING_HWS_PGA(engine->mmio_base); @@ -104,7 +105,7 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) static void flush_cs_tlb(struct intel_engine_cs *engine) { - if (!IS_GEN_RANGE(engine->i915, 6, 7)) + if (!IS_GRAPHICS_VER(engine->i915, 6, 7)) return; /* ring should be idle before issuing a sync flush*/ @@ -152,7 +153,7 @@ static void set_pp_dir(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); - if (INTEL_GEN(engine->i915) >= 7) { + if (GRAPHICS_VER(engine->i915) >= 7) { ENGINE_WRITE_FW(engine, RING_MODE_GEN7, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); @@ -228,7 +229,7 @@ static int xcs_resume(struct intel_engine_cs *engine) 5000, 0, NULL)) goto err; - if (INTEL_GEN(engine->i915) > 2) + if (GRAPHICS_VER(engine->i915) > 2) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); @@ -645,9 +646,9 @@ static int mi_set_context(struct i915_request *rq, u32 *cs; len = 4; - if (IS_GEN(i915, 7)) + if (GRAPHICS_VER(i915) == 7) len += 2 + (num_engines ? 4 * num_engines + 6 : 0); - else if (IS_GEN(i915, 5)) + else if (GRAPHICS_VER(i915) == 5) len += 2; if (flags & MI_FORCE_RESTORE) { GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); @@ -661,7 +662,7 @@ static int mi_set_context(struct i915_request *rq, return PTR_ERR(cs); /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (IS_GEN(i915, 7)) { + if (GRAPHICS_VER(i915) == 7) { *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; if (num_engines) { struct intel_engine_cs *signaller; @@ -677,7 +678,7 @@ static int mi_set_context(struct i915_request *rq, GEN6_PSMI_SLEEP_MSG_DISABLE); } } - } else if (IS_GEN(i915, 5)) { + } else if (GRAPHICS_VER(i915) == 5) { /* * This w/a is only listed for pre-production ilk a/b steppings, * but is also mentioned for programming the powerctx. To be @@ -715,7 +716,7 @@ static int mi_set_context(struct i915_request *rq, */ *cs++ = MI_NOOP; - if (IS_GEN(i915, 7)) { + if (GRAPHICS_VER(i915) == 7) { if (num_engines) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ @@ -739,7 +740,7 @@ static int mi_set_context(struct i915_request *rq, *cs++ = MI_NOOP; } *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - } else if (IS_GEN(i915, 5)) { + } else if (GRAPHICS_VER(i915) == 5) { *cs++ = MI_SUSPEND_FLUSH; } @@ -989,14 +990,10 @@ static void gen6_bsd_submit_request(struct i915_request *request) static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; - - engine->park = NULL; - engine->unpark = NULL; } static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) { - i9xx_set_default_submission(engine); engine->submit_request = gen6_bsd_submit_request; } @@ -1004,7 +1001,7 @@ static void ring_release(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - drm_WARN_ON(&dev_priv->drm, INTEL_GEN(dev_priv) > 2 && + drm_WARN_ON(&dev_priv->drm, GRAPHICS_VER(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); intel_engine_cleanup_common(engine); @@ -1021,17 +1018,24 @@ static void ring_release(struct intel_engine_cs *engine) intel_timeline_put(engine->legacy.timeline); } +static void irq_handler(struct intel_engine_cs *engine, u16 iir) +{ + intel_engine_signal_breadcrumbs(engine); +} + static void setup_irq(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (INTEL_GEN(i915) >= 6) { + intel_engine_set_irq_handler(engine, irq_handler); + + if (GRAPHICS_VER(i915) >= 6) { engine->irq_enable = gen6_irq_enable; engine->irq_disable = gen6_irq_disable; - } else if (INTEL_GEN(i915) >= 5) { + } else if (GRAPHICS_VER(i915) >= 5) { engine->irq_enable = gen5_irq_enable; engine->irq_disable = gen5_irq_disable; - } else if (INTEL_GEN(i915) >= 3) { + } else if (GRAPHICS_VER(i915) >= 3) { engine->irq_enable = gen3_irq_enable; engine->irq_disable = gen3_irq_disable; } else { @@ -1045,7 +1049,7 @@ static void setup_common(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; /* gen8+ are only supported with execlists */ - GEM_BUG_ON(INTEL_GEN(i915) >= 8); + GEM_BUG_ON(GRAPHICS_VER(i915) >= 8); setup_irq(engine); @@ -1066,14 +1070,14 @@ static void setup_common(struct intel_engine_cs *engine) * engine->emit_init_breadcrumb(). */ engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; - if (IS_GEN(i915, 5)) + if (GRAPHICS_VER(i915) == 5) engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; engine->set_default_submission = i9xx_set_default_submission; - if (INTEL_GEN(i915) >= 6) + if (GRAPHICS_VER(i915) >= 6) engine->emit_bb_start = gen6_emit_bb_start; - else if (INTEL_GEN(i915) >= 4) + else if (GRAPHICS_VER(i915) >= 4) engine->emit_bb_start = gen4_emit_bb_start; else if (IS_I830(i915) || IS_I845G(i915)) engine->emit_bb_start = i830_emit_bb_start; @@ -1090,16 +1094,16 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - if (INTEL_GEN(i915) >= 7) { + if (GRAPHICS_VER(i915) >= 7) { engine->emit_flush = gen7_emit_flush_rcs; engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; - } else if (IS_GEN(i915, 6)) { + } else if (GRAPHICS_VER(i915) == 6) { engine->emit_flush = gen6_emit_flush_rcs; engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; - } else if (IS_GEN(i915, 5)) { + } else if (GRAPHICS_VER(i915) == 5) { engine->emit_flush = gen4_emit_flush_rcs; } else { - if (INTEL_GEN(i915) < 4) + if (GRAPHICS_VER(i915) < 4) engine->emit_flush = gen2_emit_flush; else engine->emit_flush = gen4_emit_flush_rcs; @@ -1114,20 +1118,20 @@ static void setup_vcs(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (INTEL_GEN(i915) >= 6) { + if (GRAPHICS_VER(i915) >= 6) { /* gen6 bsd needs a special wa for tail updates */ - if (IS_GEN(i915, 6)) + if (GRAPHICS_VER(i915) == 6) engine->set_default_submission = gen6_bsd_set_default_submission; engine->emit_flush = gen6_emit_flush_vcs; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - if (IS_GEN(i915, 6)) + if (GRAPHICS_VER(i915) == 6) engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; else engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } else { engine->emit_flush = gen4_emit_flush_vcs; - if (IS_GEN(i915, 5)) + if (GRAPHICS_VER(i915) == 5) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; @@ -1141,7 +1145,7 @@ static void setup_bcs(struct intel_engine_cs *engine) engine->emit_flush = gen6_emit_flush_xcs; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - if (IS_GEN(i915, 6)) + if (GRAPHICS_VER(i915) == 6) engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; else engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; @@ -1151,7 +1155,7 @@ static void setup_vecs(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - GEM_BUG_ON(INTEL_GEN(i915) < 7); + GEM_BUG_ON(GRAPHICS_VER(i915) < 7); engine->emit_flush = gen6_emit_flush_xcs; engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; @@ -1199,7 +1203,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) struct i915_vma *vma; int size, err; - if (!IS_GEN(engine->i915, 7) || engine->class != RENDER_CLASS) + if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) return 0; err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 405d814e9040..06e9a8ed4e03 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -196,7 +196,7 @@ static void rps_reset_interrupts(struct intel_rps *rps) struct intel_gt *gt = rps_to_gt(rps); spin_lock_irq(>->irq_lock); - if (INTEL_GEN(gt->i915) >= 11) + if (GRAPHICS_VER(gt->i915) >= 11) gen11_rps_reset_interrupts(rps); else gen6_rps_reset_interrupts(rps); @@ -630,7 +630,7 @@ static u32 rps_limits(struct intel_rps *rps, u8 val) * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ - if (INTEL_GEN(rps_to_i915(rps)) >= 9) { + if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { limits = rps->max_freq_softlimit << 23; if (val <= rps->min_freq_softlimit) limits |= rps->min_freq_softlimit << 14; @@ -697,7 +697,7 @@ static void rps_set_power(struct intel_rps *rps, int new_power) intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); set(uncore, GEN6_RP_CONTROL, - (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | @@ -771,7 +771,7 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); u32 swreq; - if (INTEL_GEN(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9) swreq = GEN9_FREQUENCY(val); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) swreq = HSW_FREQUENCY(val); @@ -812,14 +812,14 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update) if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_rps_set(rps, val); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) err = gen6_rps_set(rps, val); else err = gen5_rps_set(rps, val); if (err) return err; - if (update && INTEL_GEN(i915) >= 6) + if (update && GRAPHICS_VER(i915) >= 6) gen6_rps_set_thresholds(rps, val); rps->last_freq = val; @@ -853,7 +853,7 @@ void intel_rps_unpark(struct intel_rps *rps) if (intel_rps_uses_timer(rps)) rps_start_timer(rps); - if (IS_GEN(rps_to_i915(rps), 5)) + if (GRAPHICS_VER(rps_to_i915(rps)) == 5) gen5_rps_update(rps); } @@ -999,7 +999,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(i915) || IS_BROADWELL(i915) || - IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { u32 ddcc_status = 0; if (sandybridge_pcode_read(i915, @@ -1012,7 +1012,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->max_freq); } - if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -1048,7 +1048,7 @@ static bool gen9_rps_enable(struct intel_rps *rps) struct intel_uncore *uncore = gt->uncore; /* Program defaults and thresholds for RPS */ - if (IS_GEN(gt->i915, 9)) + if (GRAPHICS_VER(gt->i915) == 9) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(rps->rp1_freq)); @@ -1365,16 +1365,16 @@ void intel_rps_enable(struct intel_rps *rps) enabled = chv_rps_enable(rps); else if (IS_VALLEYVIEW(i915)) enabled = vlv_rps_enable(rps); - else if (INTEL_GEN(i915) >= 9) + else if (GRAPHICS_VER(i915) >= 9) enabled = gen9_rps_enable(rps); - else if (INTEL_GEN(i915) >= 8) + else if (GRAPHICS_VER(i915) >= 8) enabled = gen8_rps_enable(rps); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) enabled = gen6_rps_enable(rps); else if (IS_IRONLAKE_M(i915)) enabled = gen5_rps_enable(rps); else - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(GRAPHICS_VER(i915)); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); if (!enabled) return; @@ -1393,7 +1393,7 @@ void intel_rps_enable(struct intel_rps *rps) if (has_busy_stats(rps)) intel_rps_set_timer(rps); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) intel_rps_set_interrupts(rps); else /* Ironlake currently uses intel_ips.ko */ {} @@ -1414,7 +1414,7 @@ void intel_rps_disable(struct intel_rps *rps) intel_rps_clear_interrupts(rps); intel_rps_clear_timer(rps); - if (INTEL_GEN(i915) >= 6) + if (GRAPHICS_VER(i915) >= 6) gen6_rps_disable(rps); else if (IS_IRONLAKE_M(i915)) gen5_rps_disable(rps); @@ -1453,14 +1453,14 @@ int intel_gpu_freq(struct intel_rps *rps, int val) { struct drm_i915_private *i915 = rps_to_i915(rps); - if (INTEL_GEN(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9) return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); else if (IS_CHERRYVIEW(i915)) return chv_gpu_freq(rps, val); else if (IS_VALLEYVIEW(i915)) return byt_gpu_freq(rps, val); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) return val * GT_FREQUENCY_MULTIPLIER; else return val; @@ -1470,14 +1470,14 @@ int intel_freq_opcode(struct intel_rps *rps, int val) { struct drm_i915_private *i915 = rps_to_i915(rps); - if (INTEL_GEN(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9) return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, GT_FREQUENCY_MULTIPLIER); else if (IS_CHERRYVIEW(i915)) return chv_freq_opcode(rps, val); else if (IS_VALLEYVIEW(i915)) return byt_freq_opcode(rps, val); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); else return val; @@ -1770,11 +1770,11 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) spin_unlock(>->irq_lock); } - if (INTEL_GEN(gt->i915) >= 8) + if (GRAPHICS_VER(gt->i915) >= 8) return; if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine[VECS0]); + intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -1833,7 +1833,7 @@ void intel_rps_init(struct intel_rps *rps) chv_rps_init(rps); else if (IS_VALLEYVIEW(i915)) vlv_rps_init(rps); - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) gen6_rps_init(rps); else if (IS_IRONLAKE_M(i915)) gen5_rps_init(rps); @@ -1843,7 +1843,7 @@ void intel_rps_init(struct intel_rps *rps) rps->min_freq_softlimit = rps->min_freq; /* After setting max-softlimit, find the overclock max freq */ - if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { + if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { u32 params = 0; sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, @@ -1872,16 +1872,16 @@ void intel_rps_init(struct intel_rps *rps) * * TODO: verify if this can be reproduced on VLV,CHV. */ - if (INTEL_GEN(i915) <= 7) + if (GRAPHICS_VER(i915) <= 7) rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; - if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) + if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } void intel_rps_sanitize(struct intel_rps *rps) { - if (INTEL_GEN(rps_to_i915(rps)) >= 6) + if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } @@ -1892,11 +1892,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = (rpstat >> 8) & 0xff; - else if (INTEL_GEN(i915) >= 9) + else if (GRAPHICS_VER(i915) >= 9) cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else if (INTEL_GEN(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6) cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; else cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> @@ -1915,7 +1915,7 @@ static u32 read_cagf(struct intel_rps *rps) vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); - } else if (INTEL_GEN(i915) >= 6) { + } else if (GRAPHICS_VER(i915) >= 6) { freq = intel_uncore_read(uncore, GEN6_RPSTAT1); } else { freq = intel_uncore_read(uncore, MEMSTAT_ILK); @@ -1968,7 +1968,7 @@ void intel_rps_driver_register(struct intel_rps *rps) * We only register the i915 ips part with intel-ips once everything is * set up, to avoid intel-ips sneaking in and reading bogus values. */ - if (IS_GEN(gt->i915, 5)) { + if (GRAPHICS_VER(gt->i915) == 5) { GEM_BUG_ON(ips_mchdev); rcu_assign_pointer(ips_mchdev, gt->i915); ips_ping_for_i915_load(); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 0d9f74aec8fe..367fd44b81c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -590,13 +590,13 @@ void intel_sseu_info_init(struct intel_gt *gt) cherryview_sseu_info_init(gt); else if (IS_BROADWELL(i915)) bdw_sseu_info_init(gt); - else if (IS_GEN(i915, 9)) + else if (GRAPHICS_VER(i915) == 9) gen9_sseu_info_init(gt); - else if (IS_GEN(i915, 10)) + else if (GRAPHICS_VER(i915) == 10) gen10_sseu_info_init(gt); - else if (IS_GEN(i915, 11)) + else if (GRAPHICS_VER(i915) == 11) gen11_sseu_info_init(gt); - else if (INTEL_GEN(i915) >= 12) + else if (GRAPHICS_VER(i915) >= 12) gen12_sseu_info_init(gt); } @@ -613,7 +613,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * No explicit RPCS request is needed to ensure full * slice/subslice/EU enablement prior to Gen9. */ - if (INTEL_GEN(i915) < 9) + if (GRAPHICS_VER(i915) < 9) return 0; /* @@ -651,7 +651,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * subslices are enabled, or a count between one and four on the first * slice. */ - if (IS_GEN(i915, 11) && + if (GRAPHICS_VER(i915) == 11 && slices == 1 && subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { GEM_BUG_ON(subslices & 1); @@ -669,7 +669,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, if (sseu->has_slice_pg) { u32 mask, val = slices; - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { mask = GEN11_RPCS_S_CNT_MASK; val <<= GEN11_RPCS_S_CNT_SHIFT; } else { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 51780282d872..714fe8495775 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -248,7 +248,7 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) struct sseu_dev_info sseu; intel_wakeref_t wakeref; - if (INTEL_GEN(i915) < 8) + if (GRAPHICS_VER(i915) < 8) return -ENODEV; seq_puts(m, "SSEU Device Info\n"); @@ -265,9 +265,9 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) cherryview_sseu_device_status(gt, &sseu); else if (IS_BROADWELL(i915)) bdw_sseu_device_status(gt, &sseu); - else if (IS_GEN(i915, 9)) + else if (GRAPHICS_VER(i915) == 9) gen9_sseu_device_status(gt, &sseu); - else if (INTEL_GEN(i915) >= 10) + else if (GRAPHICS_VER(i915) >= 10) gen10_sseu_device_status(gt, &sseu); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index f19cf6d2fa85..c4a126c8caef 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -32,7 +32,6 @@ static struct i915_vma *hwsp_alloc(struct intel_gt *gt) return vma; } -__i915_active_call static void __timeline_retire(struct i915_active *active) { struct intel_timeline *tl = @@ -104,7 +103,8 @@ static int intel_timeline_init(struct intel_timeline *timeline, INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); - i915_active_init(&timeline->active, __timeline_active, __timeline_retire); + i915_active_init(&timeline->active, __timeline_active, + __timeline_retire, 0); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 2c6f7217469f..b62d1e31a645 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -607,9 +607,38 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } +/* + * These settings aren't actually workarounds, but general tuning settings that + * need to be programmed on several platforms. + */ +static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + /* + * Although some platforms refer to it as Wa_1604555607, we need to + * program it even on those that don't explicitly list that + * workaround. + * + * Note that the programming of this register is further modified + * according to the FF_MODE2 guidance given by Wa_1608008084:gen12. + * Wa_1608008084 tells us the FF_MODE2 register will return the wrong + * value when read. The default value for this register is zero for all + * fields and there are no bit masks. So instead of doing a RMW we + * should just write TDS timer value. For the same reason read + * verification is ignored. + */ + wa_add(wal, + FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0); +} + static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + gen12_ctx_gt_tuning_init(engine, wal); + /* * Wa_1409142259:tgl * Wa_1409347922:tgl @@ -628,27 +657,17 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_field_set(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); -} - -static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) -{ - gen12_ctx_workarounds_init(engine, wal); /* - * Wa_1604555607:tgl,rkl + * Wa_16011163337 * - * Note that the implementation of this workaround is further modified - * according to the FF_MODE2 guidance given by Wa_1608008084:gen12. - * FF_MODE2 register will return the wrong value when read. The default - * value for this register is zero for all fields and there are no bit - * masks. So instead of doing a RMW we should just write the GS Timer - * and TDS timer values for Wa_1604555607 and Wa_16011163337. + * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due + * to Wa_1608008084. */ wa_add(wal, FF_MODE2, - FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, + FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224, 0); } @@ -664,16 +683,6 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_22010493298 */ wa_masked_en(wal, HIZ_CHICKEN, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); - - /* - * Wa_16011163337 - * - * Like in tgl_ctx_workarounds_init(), read verification is ignored due - * to Wa_1608008084. - */ - wa_add(wal, - FF_MODE2, - FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0); } static void @@ -690,12 +699,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); - else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) || - IS_TIGERLAKE(i915)) - tgl_ctx_workarounds_init(engine, wal); - else if (IS_GEN(i915, 12)) + else if (GRAPHICS_VER(i915) == 12) gen12_ctx_workarounds_init(engine, wal); - else if (IS_GEN(i915, 11)) + else if (GRAPHICS_VER(i915) == 11) icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) cnl_ctx_workarounds_init(engine, wal); @@ -713,14 +719,14 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, chv_ctx_workarounds_init(engine, wal); else if (IS_BROADWELL(i915)) bdw_ctx_workarounds_init(engine, wal); - else if (IS_GEN(i915, 7)) + else if (GRAPHICS_VER(i915) == 7) gen7_ctx_workarounds_init(engine, wal); - else if (IS_GEN(i915, 6)) + else if (GRAPHICS_VER(i915) == 6) gen6_ctx_workarounds_init(engine, wal); - else if (INTEL_GEN(i915) < 8) + else if (GRAPHICS_VER(i915) < 8) ; else - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(GRAPHICS_VER(i915)); wa_init_finish(wal); } @@ -944,7 +950,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) unsigned int slice, subslice; u32 l3_en, mcr, mcr_mask; - GEM_BUG_ON(INTEL_GEN(i915) < 10); + GEM_BUG_ON(GRAPHICS_VER(i915) < 10); /* * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl @@ -974,7 +980,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * of every MMIO read. */ - if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { + if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { u32 l3_fuse = intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & GEN10_L3BANK_MASK; @@ -996,7 +1002,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) } subslice--; - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; } else { @@ -1078,11 +1084,37 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } +/* + * Though there are per-engine instances of these registers, + * they retain their value through engine resets and should + * only be provided on the GT workaround list rather than + * the engine-specific workaround list. + */ +static void +wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + struct intel_gt *gt = &i915->gt; + int id; + + for_each_engine(engine, gt, id) { + if (engine->class != VIDEO_DECODE_CLASS || + (engine->instance % 2)) + continue; + + wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base), + IECPUNIT_CLKGATE_DIS); + } +} + static void gen12_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { wa_init_mcr(i915, wal); + + /* Wa_14011060649:tgl,rkl,dg1,adls */ + wa_14011060649(i915, wal); } static void @@ -1139,9 +1171,9 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) dg1_gt_workarounds_init(i915, wal); else if (IS_TIGERLAKE(i915)) tgl_gt_workarounds_init(i915, wal); - else if (IS_GEN(i915, 12)) + else if (GRAPHICS_VER(i915) == 12) gen12_gt_workarounds_init(i915, wal); - else if (IS_GEN(i915, 11)) + else if (GRAPHICS_VER(i915) == 11) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) cnl_gt_workarounds_init(i915, wal); @@ -1161,18 +1193,18 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); - else if (IS_GEN(i915, 6)) + else if (GRAPHICS_VER(i915) == 6) snb_gt_workarounds_init(i915, wal); - else if (IS_GEN(i915, 5)) + else if (GRAPHICS_VER(i915) == 5) ilk_gt_workarounds_init(i915, wal); else if (IS_G4X(i915)) g4x_gt_workarounds_init(i915, wal); - else if (IS_GEN(i915, 4)) + else if (GRAPHICS_VER(i915) == 4) gen4_gt_workarounds_init(i915, wal); - else if (INTEL_GEN(i915) <= 8) + else if (GRAPHICS_VER(i915) <= 8) ; else - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(GRAPHICS_VER(i915)); } void intel_gt_init_workarounds(struct drm_i915_private *i915) @@ -1526,9 +1558,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) if (IS_DG1(i915)) dg1_whitelist_build(engine); - else if (IS_GEN(i915, 12)) + else if (GRAPHICS_VER(i915) == 12) tgl_whitelist_build(engine); - else if (IS_GEN(i915, 11)) + else if (GRAPHICS_VER(i915) == 11) icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) cnl_whitelist_build(engine); @@ -1544,10 +1576,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) bxt_whitelist_build(engine); else if (IS_SKYLAKE(i915)) skl_whitelist_build(engine); - else if (INTEL_GEN(i915) <= 8) + else if (GRAPHICS_VER(i915) <= 8) ; else - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(GRAPHICS_VER(i915)); wa_init_finish(w); } @@ -1663,7 +1695,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) ENABLE_SMALLPL); } - if (IS_GEN(i915, 11)) { + if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, _3D_CHICKEN3, @@ -1755,14 +1787,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - /* Wa_22010271021:ehl */ - if (IS_JSL_EHL(i915)) - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); + /* Wa_22010271021 */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_GEN_RANGE(i915, 9, 12)) { + if (IS_GRAPHICS_VER(i915, 9, 12)) { /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, @@ -1786,7 +1817,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); } - if (IS_GEN(i915, 9)) { + if (GRAPHICS_VER(i915) == 9) { /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, @@ -1828,9 +1859,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) CACHE_MODE_0_GEN7, /* enable HiZ Raw Stall Optimization */ HIZ_RAW_STALL_OPT_DISABLE); - - /* WaDisable4x2SubspanOptimization:hsw */ - wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); } if (IS_VALLEYVIEW(i915)) { @@ -1893,7 +1921,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); } - if (IS_GEN(i915, 7)) { + if (GRAPHICS_VER(i915) == 7) { /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ wa_masked_en(wal, GFX_MODE_GEN7, @@ -1925,7 +1953,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_WIZ_HASHING_16x4); } - if (IS_GEN_RANGE(i915, 6, 7)) + if (IS_GRAPHICS_VER(i915, 6, 7)) /* * We need to disable the AsyncFlip performance optimisations in * order to use MI_WAIT_FOR_EVENT within the CS. It should @@ -1937,7 +1965,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) MI_MODE, ASYNC_FLIP_PERF_DISABLE); - if (IS_GEN(i915, 6)) { + if (GRAPHICS_VER(i915) == 6) { /* * Required for the hardware to program scanline values for * waiting @@ -1991,14 +2019,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) CM0_STC_EVICT_DISABLE_LRA_SNB); } - if (IS_GEN_RANGE(i915, 4, 6)) + if (IS_GRAPHICS_VER(i915, 4, 6)) /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ wa_add(wal, MI_MODE, 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), /* XXX bit doesn't stick on Broadwater */ IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); - if (IS_GEN(i915, 4)) + if (GRAPHICS_VER(i915) == 4) /* * Disable CONSTANT_BUFFER before it is loaded from the context * image. For as it is loaded, it is executed and the stored @@ -2030,7 +2058,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4)) + if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; if (engine->class == RENDER_CLASS) @@ -2043,7 +2071,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (INTEL_GEN(engine->i915) < 4) + if (GRAPHICS_VER(engine->i915) < 4) return; wa_init_start(wal, "engine", engine->name); @@ -2084,9 +2112,9 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) const struct mcr_range *mcr_ranges; int i; - if (INTEL_GEN(i915) >= 12) + if (GRAPHICS_VER(i915) >= 12) mcr_ranges = mcr_ranges_gen12; - else if (INTEL_GEN(i915) >= 8) + else if (GRAPHICS_VER(i915) >= 8) mcr_ranges = mcr_ranges_gen8; else return false; @@ -2115,7 +2143,7 @@ wa_list_srm(struct i915_request *rq, u32 srm, *cs; srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(i915) >= 8) + if (GRAPHICS_VER(i915) >= 8) srm++; for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index e1ba03b93ffa..32589c6625e1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -55,7 +55,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) kfree(ring); return NULL; } - i915_active_init(&ring->vma->active, NULL, NULL); + i915_active_init(&ring->vma->active, NULL, NULL, 0); __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma)); __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags); ring->vma->node.size = sz; diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index b9bdd1d23243..26685b927169 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -88,7 +88,8 @@ static int __live_context_size(struct intel_engine_cs *engine) goto err; vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj, - i915_coherent_map_type(engine->i915)); + i915_coherent_map_type(engine->i915, + ce->state->obj, false)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 3453eb77c498..64abf5feabfa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -52,7 +52,7 @@ static int write_timestamp(struct i915_request *rq, int slot) return PTR_ERR(cs); cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (INTEL_GEN(rq->engine->i915) >= 8) + if (GRAPHICS_VER(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); @@ -125,7 +125,7 @@ static int perf_mi_bb_start(void *arg) enum intel_engine_id id; int err = 0; - if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ return 0; perf_begin(gt); @@ -249,7 +249,7 @@ static int perf_mi_noop(void *arg) enum intel_engine_id id; int err = 0; - if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ return 0; perf_begin(gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index b2c369317bf1..4896e4ccad50 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -77,7 +77,7 @@ static struct pulse *pulse_create(void) return p; kref_init(&p->kref); - i915_active_init(&p->active, pulse_active, pulse_retire); + i915_active_init(&p->active, pulse_active, pulse_retire, 0); return p; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 2c898622bdfb..72cca3f0da21 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -198,7 +198,7 @@ static int live_engine_timestamps(void *arg) * the same CS clock. */ - if (INTEL_GEN(gt->i915) < 8) + if (GRAPHICS_VER(gt->i915) < 8) return 0; for_each_engine(engine, gt, id) { diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 1081cd36a2bd..1c8108d30b85 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -551,6 +551,32 @@ static int live_pin_rewind(void *arg) return err; } +static int engine_lock_reset_tasklet(struct intel_engine_cs *engine) +{ + tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &engine->gt->reset.flags)) { + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); + + intel_gt_set_wedged(engine->gt); + return -EBUSY; + } + + return 0; +} + +static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine) +{ + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + &engine->gt->reset.flags); + + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); +} + static int live_hold_reset(void *arg) { struct intel_gt *gt = arg; @@ -598,15 +624,9 @@ static int live_hold_reset(void *arg) /* We have our request executing, now remove it and reset */ - local_bh_disable(); - if (test_and_set_bit(I915_RESET_ENGINE + id, - >->reset.flags)) { - local_bh_enable(); - intel_gt_set_wedged(gt); - err = -EBUSY; + err = engine_lock_reset_tasklet(engine); + if (err) goto out; - } - tasklet_disable(&engine->execlists.tasklet); engine->execlists.tasklet.callback(&engine->execlists.tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); @@ -618,10 +638,7 @@ static int live_hold_reset(void *arg) __intel_engine_reset_bh(engine, NULL); GEM_BUG_ON(rq->fence.error != -EIO); - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(I915_RESET_ENGINE + id, - >->reset.flags); - local_bh_enable(); + engine_unlock_reset_tasklet(engine); /* Check that we do not resubmit the held request */ if (!i915_request_wait(rq, 0, HZ / 5)) { @@ -3269,7 +3286,7 @@ static int live_preempt_user(void *arg) if (!intel_engine_has_preemption(engine)) continue; - if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) + if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS) continue; /* we need per-context GPR */ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { @@ -4293,7 +4310,7 @@ static int live_virtual_preserved(void *arg) return 0; /* As we use CS_GPR we cannot run before they existed on all engines. */ - if (INTEL_GEN(gt->i915) < 9) + if (GRAPHICS_VER(gt->i915) < 9) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -4585,15 +4602,9 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(engine == ve->engine); /* Take ownership of the reset and tasklet */ - local_bh_disable(); - if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) { - local_bh_enable(); - intel_gt_set_wedged(gt); - err = -EBUSY; + err = engine_lock_reset_tasklet(engine); + if (err) goto out_heartbeat; - } - tasklet_disable(&engine->execlists.tasklet); engine->execlists.tasklet.callback(&engine->execlists.tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); @@ -4612,9 +4623,7 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(rq->fence.error != -EIO); /* Release our grasp on the engine, letting CS flow again */ - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); - local_bh_enable(); + engine_unlock_reset_tasklet(engine); /* Check that we do not resubmit the held request */ i915_request_get(rq); @@ -4716,7 +4725,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_reset), }; - if (!HAS_EXECLISTS(i915)) + if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP) return 0; if (intel_gt_is_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index c0845bf72dd3..b9441217ca3d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -74,10 +74,10 @@ static int live_gt_clocks(void *arg) return 0; } - if (INTEL_GEN(gt->i915) < 4) /* Any CS_TIMESTAMP? */ + if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - if (IS_GEN(gt->i915, 5)) + if (GRAPHICS_VER(gt->i915) == 5) /* * XXX CS_TIMESTAMP low dword is dysfunctional? * @@ -86,7 +86,7 @@ static int live_gt_clocks(void *arg) */ return 0; - if (IS_GEN(gt->i915, 4)) + if (GRAPHICS_VER(gt->i915) == 4) /* * XXX CS_TIMESTAMP appears gibberish * @@ -105,7 +105,7 @@ static int live_gt_clocks(void *arg) u64 time; u64 dt; - if (INTEL_GEN(engine->i915) < 7 && engine->id != RCS0) + if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) continue; measure_clocks(engine, &cycles, &dt); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 746985971c3a..853246fad05f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -69,7 +69,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) h->seqno = memset(vaddr, 0xff, PAGE_SIZE); vaddr = i915_gem_object_pin_map_unlocked(h->obj, - i915_coherent_map_type(gt->i915)); + i915_coherent_map_type(gt->i915, h->obj, false)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_unpin_hws; @@ -130,7 +130,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) return ERR_CAST(obj); } - vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915)); + vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); i915_vm_put(vm); @@ -180,7 +180,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) goto cancel_rq; batch = h->batch; - if (INTEL_GEN(gt->i915) >= 8) { + if (GRAPHICS_VER(gt->i915) >= 8) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); @@ -194,7 +194,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); - } else if (INTEL_GEN(gt->i915) >= 6) { + } else if (GRAPHICS_VER(gt->i915) >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -207,7 +207,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); - } else if (INTEL_GEN(gt->i915) >= 4) { + } else if (GRAPHICS_VER(gt->i915) >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -243,7 +243,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) } flags = 0; - if (INTEL_GEN(gt->i915) <= 5) + if (GRAPHICS_VER(gt->i915) <= 5) flags |= I915_DISPATCH_SECURE; err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index 94006f117bbd..459b775f163a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -44,7 +44,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ia_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (GRAPHICS_VER(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ia_freq); err = -EINVAL; break; @@ -54,7 +54,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ring_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (GRAPHICS_VER(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ring_freq); err = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 85e7df6a5123..3119016d9910 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -584,7 +584,7 @@ static int __live_lrc_gpr(struct intel_engine_cs *engine, int err; int n; - if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS) return 0; /* GPR only on rcs0 for gen8 */ err = gpr_make_dirty(engine->kernel_context); @@ -1221,7 +1221,9 @@ static int compare_isolation(struct intel_engine_cs *engine, } lrc = i915_gem_object_pin_map_unlocked(ce->state->obj, - i915_coherent_map_type(engine->i915)); + i915_coherent_map_type(engine->i915, + ce->state->obj, + false)); if (IS_ERR(lrc)) { err = PTR_ERR(lrc); goto err_B1; @@ -1387,10 +1389,10 @@ err_A: static bool skip_isolation(const struct intel_engine_cs *engine) { - if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) + if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9) return true; - if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) + if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11) return true; return false; @@ -1549,7 +1551,7 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) /* We use the already reserved extra page in context state */ if (!a->wa_bb_page) { GEM_BUG_ON(b->wa_bb_page); - GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); + GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12); goto unpin_b; } diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index e55a887d11e2..b9bb0e6e97f7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -183,7 +183,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff; + return GRAPHICS_VER(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff; } static int check_l3cc_table(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index f097e420ac45..8c70b7e12074 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -34,6 +34,7 @@ int live_rc6_manual(void *arg) struct intel_rc6 *rc6 = >->rc6; u64 rc0_power, rc6_power; intel_wakeref_t wakeref; + bool has_power; ktime_t dt; u64 res[2]; int err = 0; @@ -50,6 +51,7 @@ int live_rc6_manual(void *arg) if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) return 0; + has_power = librapl_supported(gt->i915); wakeref = intel_runtime_pm_get(gt->uncore->rpm); /* Force RC6 off for starters */ @@ -71,11 +73,14 @@ int live_rc6_manual(void *arg) goto out_unlock; } - rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt)); - if (!rc0_power) { - pr_err("No power measured while in RC0\n"); - err = -EINVAL; - goto out_unlock; + if (has_power) { + rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, + ktime_to_ns(dt)); + if (!rc0_power) { + pr_err("No power measured while in RC0\n"); + err = -EINVAL; + goto out_unlock; + } } /* Manually enter RC6 */ @@ -97,13 +102,16 @@ int live_rc6_manual(void *arg) err = -EINVAL; } - rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, ktime_to_ns(dt)); - pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", - rc0_power, rc6_power); - if (2 * rc6_power > rc0_power) { - pr_err("GPU leaked energy while in RC6!\n"); - err = -EINVAL; - goto out_unlock; + if (has_power) { + rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, + ktime_to_ns(dt)); + pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", + rc0_power, rc6_power); + if (2 * rc6_power > rc0_power) { + pr_err("GPU leaked energy while in RC6!\n"); + err = -EINVAL; + goto out_unlock; + } } /* Restore what should have been the original state! */ @@ -132,7 +140,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce) } cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (INTEL_GEN(rq->engine->i915) >= 8) + if (GRAPHICS_VER(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; @@ -185,7 +193,7 @@ int live_rc6_ctx_wa(void *arg) int err = 0; /* A read of CTX_INFO upsets rc6. Poke the bear! */ - if (INTEL_GEN(gt->i915) < 8) + if (GRAPHICS_VER(gt->i915) < 8) return 0; engines = randomised_engines(gt, &prng, &count); diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 99609271c3a7..041954408d0f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -41,10 +41,10 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine) return ERR_CAST(cs); } - if (INTEL_GEN(engine->i915) >= 6) { + if (GRAPHICS_VER(engine->i915) >= 6) { *cs++ = MI_STORE_DWORD_IMM_GEN4; *cs++ = 0; - } else if (INTEL_GEN(engine->i915) >= 4) { + } else if (GRAPHICS_VER(engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; } else { @@ -266,7 +266,7 @@ static int live_ctx_switch_wa(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - if (IS_GEN_RANGE(gt->i915, 4, 5)) + if (IS_GRAPHICS_VER(gt->i915, 4, 5)) continue; /* MI_STORE_DWORD is privileged! */ saved_wa = fetch_and_zero(&engine->wa_ctx.vma); @@ -291,7 +291,7 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915) SUBTEST(live_ctx_switch_wa), }; - if (HAS_EXECLISTS(i915)) + if (i915->gt.submission_method > INTEL_SUBMISSION_RING) return 0; return intel_gt_live_subtests(tests, &i915->gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 967641fee42a..7ee2513e15f9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -204,7 +204,7 @@ static void show_pstate_limits(struct intel_rps *rps) i915_mmio_reg_offset(BXT_RP_STATE_CAP), intel_uncore_read(rps_to_uncore(rps), BXT_RP_STATE_CAP)); - } else if (IS_GEN(i915, 9)) { + } else if (GRAPHICS_VER(i915) == 9) { pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), intel_uncore_read(rps_to_uncore(rps), @@ -222,7 +222,7 @@ int live_rps_clock_interval(void *arg) struct igt_spinner spin; int err = 0; - if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6) + if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; if (igt_spinner_init(&spin, gt)) @@ -506,7 +506,7 @@ static void show_pcu_config(struct intel_rps *rps) min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; - if (INTEL_GEN(i915) >= 9) { + if (GRAPHICS_VER(i915) >= 9) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq /= GEN9_FREQ_SCALER; max_gpu_freq /= GEN9_FREQ_SCALER; @@ -606,7 +606,7 @@ int live_rps_frequency_cs(void *arg) int err = 0; /* - * The premise is that the GPU does change freqency at our behest. + * The premise is that the GPU does change frequency at our behest. * Let's check there is a correspondence between the requested * frequency, the actual frequency, and the observed clock rate. */ @@ -614,7 +614,7 @@ int live_rps_frequency_cs(void *arg) if (!intel_rps_is_enabled(rps)) return 0; - if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ + if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ return 0; if (CPU_LATENCY >= 0) @@ -747,7 +747,7 @@ int live_rps_frequency_srm(void *arg) int err = 0; /* - * The premise is that the GPU does change freqency at our behest. + * The premise is that the GPU does change frequency at our behest. * Let's check there is a correspondence between the requested * frequency, the actual frequency, and the observed clock rate. */ @@ -755,7 +755,7 @@ int live_rps_frequency_srm(void *arg) if (!intel_rps_is_enabled(rps)) return 0; - if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ + if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ return 0; if (CPU_LATENCY >= 0) @@ -1031,7 +1031,7 @@ int live_rps_interrupt(void *arg) * First, let's check whether or not we are receiving interrupts. */ - if (!intel_rps_has_interrupts(rps) || INTEL_GEN(gt->i915) < 6) + if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; intel_gt_pm_get(gt); @@ -1136,10 +1136,10 @@ int live_rps_power(void *arg) * that theory. */ - if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6) + if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; - if (!librapl_energy_uJ()) + if (!librapl_supported(gt->i915)) return 0; if (igt_spinner_init(&spin, gt)) @@ -1240,7 +1240,7 @@ int live_rps_dynamic(void *arg) * moving parts into dynamic reclocking based on load. */ - if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6) + if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; if (igt_spinner_init(&spin, gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 9adbd9d147be..64da0c91dec1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -457,12 +457,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) if (IS_ERR(cs)) return PTR_ERR(cs); - if (INTEL_GEN(rq->engine->i915) >= 8) { + if (GRAPHICS_VER(rq->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = addr; *cs++ = 0; *cs++ = value; - } else if (INTEL_GEN(rq->engine->i915) >= 4) { + } else if (GRAPHICS_VER(rq->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = addr; @@ -992,7 +992,7 @@ static int live_hwsp_read(void *arg) * even across multiple wraps. */ - if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */ + if (GRAPHICS_VER(gt->i915) < 8) /* CS convenience [SRM/LRM] */ return 0; tl = intel_timeline_create(gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 64937ec3f2dc..c30754daf4b1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -145,7 +145,7 @@ read_nonprivs(struct intel_context *ce) goto err_req; srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(engine->i915) >= 8) + if (GRAPHICS_VER(engine->i915) >= 8) srm++; cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); @@ -546,7 +546,7 @@ retry: srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; - if (INTEL_GEN(engine->i915) >= 8) + if (GRAPHICS_VER(engine->i915) >= 8) lrm++, srm++; pr_debug("%s: Writing garbage to %x\n", @@ -749,7 +749,7 @@ static int live_dirty_whitelist(void *arg) /* Can the user write to the whitelisted registers? */ - if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ + if (GRAPHICS_VER(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; for_each_engine(engine, gt, id) { @@ -829,7 +829,7 @@ static int read_whitelisted_registers(struct intel_context *ce, goto err_req; srm = MI_STORE_REGISTER_MEM; - if (INTEL_GEN(engine->i915) >= 8) + if (GRAPHICS_VER(engine->i915) >= 8) srm++; cs = intel_ring_begin(rq, 4 * engine->whitelist.count); diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index f8f02aab842b..0683b27a3890 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -8,6 +8,7 @@ #include <linux/shmem_fs.h> #include "gem/i915_gem_object.h" +#include "gem/i915_gem_lmem.h" #include "shmem_utils.h" struct file *shmem_create_from_data(const char *name, void *data, size_t len) @@ -39,7 +40,8 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) return file; } - ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); + ptr = i915_gem_object_pin_map_unlocked(obj, i915_gem_object_is_lmem(obj) ? + I915_MAP_WC : I915_MAP_WB); if (IS_ERR(ptr)) return ERR_CAST(ptr); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h new file mode 100644 index 000000000000..90efef8a73e4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ACTIONS_ABI_H +#define _ABI_GUC_ACTIONS_ABI_H + +enum intel_guc_action { + INTEL_GUC_ACTION_DEFAULT = 0x0, + INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, + INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, + INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, + INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, + INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, + INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, + INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, + INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, + INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, + INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + INTEL_GUC_ACTION_LIMIT +}; + +enum intel_guc_preempt_options { + INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum intel_guc_report_status { + INTEL_GUC_REPORT_STATUS_UNKNOWN = 0x0, + INTEL_GUC_REPORT_STATUS_ACKED = 0x1, + INTEL_GUC_REPORT_STATUS_ERROR = 0x2, + INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + +enum intel_guc_sleep_state_status { + INTEL_GUC_SLEEP_STATE_SUCCESS = 0x1, + INTEL_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, + INTEL_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 +#define INTEL_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 +}; + +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +#endif /* _ABI_GUC_ACTIONS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h new file mode 100644 index 000000000000..d38935f47ecf --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H +#define _ABI_GUC_COMMUNICATION_CTB_ABI_H + +#include <linux/types.h> + +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is stored in the buffer descriptor. + * Buffer descriptor holds tail and head fields that represents active data + * stream. The tail field is updated by the data producer (sender), and head + * field is updated by the data consumer (receiver):: + * + * +------------+ + * | DESCRIPTOR | +=================+============+========+ + * +============+ | | MESSAGE(s) | | + * | address |--------->+=================+============+========+ + * +------------+ + * | head | ^-----head--------^ + * +------------+ + * | tail | ^---------tail-----------------^ + * +------------+ + * | size | ^---------------size--------------------^ + * +------------+ + * + * Each message in data stream starts with the single u32 treated as a header, + * followed by optional set of u32 data that makes message specific payload:: + * + * +------------+---------+---------+---------+ + * | MESSAGE | + * +------------+---------+---------+---------+ + * | msg[0] | [1] | ... | [n-1] | + * +------------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+ + * | | 0 | ... | n | + * +======+=====+=========+=========+=========+ + * | 31:16| code| | | | + * +------+-----+ | | | + * | 15:5|flags| | | | + * +------+-----+ | | | + * | 4:0| len| | | | + * +------+-----+---------+---------+---------+ + * + * ^-------------len-------------^ + * + * The message header consists of: + * + * - **len**, indicates length of the message payload (in u32) + * - **code**, indicates message code + * - **flags**, holds various bits to control message handling + */ + +/* + * Describes single command transport buffer. + * Used by both guc-master and clients. + */ +struct guc_ct_buffer_desc { + u32 addr; /* gfx address */ + u64 host_private; /* host private data */ + u32 size; /* size in bytes */ + u32 head; /* offset updated by GuC*/ + u32 tail; /* offset updated by owner */ + u32 is_in_error; /* error indicator */ + u32 reserved1; + u32 reserved2; + u32 owner; /* id of the channel owner */ + u32 owner_sub_id; /* owner-defined field for extra tracking */ + u32 reserved[5]; +} __packed; + +/* Type of command transport buffer */ +#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u +#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u + +/* + * Definition of the command transport message header (DW0) + * + * bit[4..0] message len (in dwords) + * bit[7..5] reserved + * bit[8] response (G2H only) + * bit[8] write fence to desc (H2G only) + * bit[9] write status to H2G buff (H2G only) + * bit[10] send status back via G2H (H2G only) + * bit[15..11] reserved + * bit[31..16] action code + */ +#define GUC_CT_MSG_LEN_SHIFT 0 +#define GUC_CT_MSG_LEN_MASK 0x1F +#define GUC_CT_MSG_IS_RESPONSE (1 << 8) +#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) +#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) +#define GUC_CT_MSG_SEND_STATUS (1 << 10) +#define GUC_CT_MSG_ACTION_SHIFT 16 +#define GUC_CT_MSG_ACTION_MASK 0xFFFF + +#endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h new file mode 100644 index 000000000000..be066a62e9e0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H +#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H + +/** + * DOC: MMIO based communication + * + * The MMIO based communication between Host and GuC uses software scratch + * registers, where first register holds data treated as message header, + * and other registers are used to hold message payload. + * + * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 8 parameters and the GuC FW + * itself uses an 8-element array to store the H2G message. + * + * +-----------+---------+---------+---------+ + * | MMIO[0] | MMIO[1] | ... | MMIO[n] | + * +-----------+---------+---------+---------+ + * | header | optional payload | + * +======+====+=========+=========+=========+ + * | 31:28|type| | | | + * +------+----+ | | | + * | 27:16|data| | | | + * +------+----+ | | | + * | 15:0|code| | | | + * +------+----+---------+---------+---------+ + * + * The message header consists of: + * + * - **type**, indicates message type + * - **code**, indicates message code, is specific for **type** + * - **data**, indicates message data, optional, depends on **code** + * + * The following message **types** are supported: + * + * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code + * must be priovided in **code** field. Optional action specific parameters + * can be provided in remaining payload registers or **data** field. + * + * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, + * action response status will be provided in **code** field. Optional + * response data can be returned in remaining payload registers or **data** + * field. + */ + +#define GUC_MAX_MMIO_MSG_LEN 8 + +#endif /* _ABI_GUC_COMMUNICATION_MMIO_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h new file mode 100644 index 000000000000..488b6061ee89 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ERRORS_ABI_H +#define _ABI_GUC_ERRORS_ABI_H + +enum intel_guc_response_status { + INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, +}; + +#endif /* _ABI_GUC_ERRORS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h new file mode 100644 index 000000000000..775e21f3058c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_MESSAGES_ABI_H +#define _ABI_GUC_MESSAGES_ABI_H + +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#endif /* _ABI_GUC_MESSAGES_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 78305b2ec89d..f147cb389a20 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -60,15 +60,8 @@ void intel_guc_init_send_regs(struct intel_guc *guc) enum forcewake_domains fw_domains = 0; unsigned int i; - if (INTEL_GEN(gt->i915) >= 11) { - guc->send_regs.base = - i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0)); - guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT; - } else { - guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); - guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN; - BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT); - } + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); for (i = 0; i < guc->send_regs.count; i++) { fw_domains |= intel_uncore_forcewake_for_reg(gt->uncore, @@ -96,12 +89,9 @@ static void gen9_enable_guc_interrupts(struct intel_guc *guc) assert_rpm_wakelock_held(>->i915->runtime_pm); spin_lock_irq(>->irq_lock); - if (!guc->interrupts.enabled) { - WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & - gt->pm_guc_events); - guc->interrupts.enabled = true; - gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); - } + WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & + gt->pm_guc_events); + gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); spin_unlock_irq(>->irq_lock); } @@ -112,7 +102,6 @@ static void gen9_disable_guc_interrupts(struct intel_guc *guc) assert_rpm_wakelock_held(>->i915->runtime_pm); spin_lock_irq(>->irq_lock); - guc->interrupts.enabled = false; gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); @@ -134,18 +123,14 @@ static void gen11_reset_guc_interrupts(struct intel_guc *guc) static void gen11_enable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); spin_lock_irq(>->irq_lock); - if (!guc->interrupts.enabled) { - u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); - - WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); - intel_uncore_write(gt->uncore, - GEN11_GUC_SG_INTR_ENABLE, events); - intel_uncore_write(gt->uncore, - GEN11_GUC_SG_INTR_MASK, ~events); - guc->interrupts.enabled = true; - } + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_ENABLE, events); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_MASK, ~events); spin_unlock_irq(>->irq_lock); } @@ -154,7 +139,6 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); spin_lock_irq(>->irq_lock); - guc->interrupts.enabled = false; intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); @@ -176,16 +160,23 @@ void intel_guc_init_early(struct intel_guc *guc) mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { guc->notify_reg = GEN11_GUC_HOST_INTERRUPT; guc->interrupts.reset = gen11_reset_guc_interrupts; guc->interrupts.enable = gen11_enable_guc_interrupts; guc->interrupts.disable = gen11_disable_guc_interrupts; + guc->send_regs.base = + i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0)); + guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT; + } else { guc->notify_reg = GUC_SEND_INTERRUPT; guc->interrupts.reset = gen9_reset_guc_interrupts; guc->interrupts.enable = gen9_enable_guc_interrupts; guc->interrupts.disable = gen9_disable_guc_interrupts; + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN; + BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT); } } @@ -469,22 +460,6 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, return 0; } -int intel_guc_sample_forcewake(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; - u32 action[2]; - - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,cnl */ - if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - action[1] = 0; - else - /* bit 0 and 1 are for Render and Media domain separately */ - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - /** * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode * @guc: intel_guc structure @@ -682,7 +657,9 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, if (IS_ERR(vma)) return PTR_ERR(vma); - vaddr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(vma->obj, + i915_coherent_map_type(guc_to_gt(guc)->i915, + vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index bc2ba7d0626c..4abc59f6f3cd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -33,7 +33,6 @@ struct intel_guc { unsigned int msg_enabled_mask; struct { - bool enabled; void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); @@ -128,7 +127,6 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size); int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len); -int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 17526717368c..9abfbc6edbd6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -6,6 +6,7 @@ #include "gt/intel_gt.h" #include "gt/intel_lrc.h" #include "intel_guc_ads.h" +#include "intel_guc_fwif.h" #include "intel_uc.h" #include "i915_drv.h" @@ -104,7 +105,7 @@ static void guc_mapping_table_init(struct intel_gt *gt, GUC_MAX_INSTANCES_PER_CLASS; for_each_engine(engine, gt, id) { - u8 guc_class = engine->class; + u8 guc_class = engine_class_to_guc_class(engine->class); system_info->mapping_table[guc_class][engine->instance] = engine->instance; @@ -124,7 +125,7 @@ static void __guc_ads_init(struct intel_guc *guc) struct __guc_ads_blob *blob = guc->ads_blob; const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; u32 base; - u8 engine_class; + u8 engine_class, guc_class; /* GuC scheduling policies */ guc_policies_init(&blob->policies); @@ -140,29 +141,32 @@ static void __guc_ads_init(struct intel_guc *guc) for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { if (engine_class == OTHER_CLASS) continue; + + guc_class = engine_class_to_guc_class(engine_class); + /* * TODO: Set context pointer to default state to allow * GuC to re-init guilty contexts after internal reset. */ - blob->ads.golden_context_lrca[engine_class] = 0; - blob->ads.eng_state_size[engine_class] = + blob->ads.golden_context_lrca[guc_class] = 0; + blob->ads.eng_state_size[guc_class] = intel_engine_context_size(guc_to_gt(guc), engine_class) - skipped_size; } /* System info */ - blob->system_info.engine_enabled_masks[RENDER_CLASS] = 1; - blob->system_info.engine_enabled_masks[COPY_ENGINE_CLASS] = 1; - blob->system_info.engine_enabled_masks[VIDEO_DECODE_CLASS] = VDBOX_MASK(gt); - blob->system_info.engine_enabled_masks[VIDEO_ENHANCEMENT_CLASS] = VEBOX_MASK(gt); + blob->system_info.engine_enabled_masks[GUC_RENDER_CLASS] = 1; + blob->system_info.engine_enabled_masks[GUC_BLITTER_CLASS] = 1; + blob->system_info.engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); + blob->system_info.engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = hweight8(gt->info.sseu.slice_mask); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] = gt->info.vdbox_sfc_access; - if (INTEL_GEN(i915) >= 12 && !IS_DGFX(i915)) { + if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) { u32 distdbreg = intel_uncore_read(gt->uncore, GEN12_DIST_DBS_POPULATED); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] = diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index fa9e048cc65f..8f7b148fef58 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -7,14 +7,62 @@ #include "intel_guc_ct.h" #include "gt/intel_gt.h" +static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) +{ + return container_of(ct, struct intel_guc, ct); +} + +static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct) +{ + return guc_to_gt(ct_to_guc(ct)); +} + +static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct) +{ + return ct_to_gt(ct)->i915; +} + +static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct) +{ + return &ct_to_i915(ct)->drm; +} + #define CT_ERROR(_ct, _fmt, ...) \ - DRM_DEV_ERROR(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__) + drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__) #ifdef CONFIG_DRM_I915_DEBUG_GUC #define CT_DEBUG(_ct, _fmt, ...) \ - DRM_DEV_DEBUG_DRIVER(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__) + drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__) #else #define CT_DEBUG(...) do { } while (0) #endif +#define CT_PROBE_ERROR(_ct, _fmt, ...) \ + i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__) + +/** + * DOC: CTB Blob + * + * We allocate single blob to hold both CTB descriptors and buffers: + * + * +--------+-----------------------------------------------+------+ + * | offset | contents | size | + * +========+===============================================+======+ + * | 0x0000 | H2G `CTB Descriptor`_ (send) | | + * +--------+-----------------------------------------------+ 4K | + * | 0x0800 | G2H `CTB Descriptor`_ (recv) | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | H2G `CT Buffer`_ (send) | n*4K | + * | | | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | G2H `CT Buffer`_ (recv) | m*4K | + * | + n*4K | | | + * +--------+-----------------------------------------------+------+ + * + * Size of each `CT Buffer`_ must be multiple of 4K. + * As we don't expect too many messages, for now use minimum sizes. + */ +#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) +#define CTB_H2G_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (SZ_4K) struct ct_request { struct list_head link; @@ -24,8 +72,9 @@ struct ct_request { u32 *response_buf; }; -struct ct_incoming_request { +struct ct_incoming_msg { struct list_head link; + u32 size; u32 msg[]; }; @@ -33,6 +82,7 @@ enum { CTB_SEND = 0, CTB_RECV = 1 }; enum { CTB_OWNER_HOST = 0 }; +static void ct_receive_tasklet_func(struct tasklet_struct *t); static void ct_incoming_request_worker_func(struct work_struct *w); /** @@ -41,30 +91,13 @@ static void ct_incoming_request_worker_func(struct work_struct *w); */ void intel_guc_ct_init_early(struct intel_guc_ct *ct) { + spin_lock_init(&ct->ctbs.send.lock); + spin_lock_init(&ct->ctbs.recv.lock); spin_lock_init(&ct->requests.lock); INIT_LIST_HEAD(&ct->requests.pending); INIT_LIST_HEAD(&ct->requests.incoming); INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); -} - -static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) -{ - return container_of(ct, struct intel_guc, ct); -} - -static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct) -{ - return guc_to_gt(ct_to_guc(ct)); -} - -static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct) -{ - return ct_to_gt(ct)->i915; -} - -static inline struct device *ct_to_dev(struct intel_guc_ct *ct) -{ - return ct_to_i915(ct)->drm.dev; + tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func); } static inline const char *guc_ct_buffer_type_to_str(u32 type) @@ -88,11 +121,22 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, desc->owner = CTB_OWNER_HOST; } -static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) +static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb, u32 cmds_addr) { - desc->head = 0; - desc->tail = 0; - desc->is_in_error = 0; + guc_ct_buffer_desc_init(ctb->desc, cmds_addr, ctb->size); +} + +static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, + struct guc_ct_buffer_desc *desc, + u32 *cmds, u32 size) +{ + GEM_BUG_ON(size % 4); + + ctb->desc = desc; + ctb->cmds = cmds; + ctb->size = size; + + guc_ct_buffer_reset(ctb, 0); } static int guc_action_register_ct_buffer(struct intel_guc *guc, @@ -153,48 +197,42 @@ static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) int intel_guc_ct_init(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); + struct guc_ct_buffer_desc *desc; + u32 blob_size; + u32 cmds_size; void *blob; + u32 *cmds; int err; - int i; GEM_BUG_ON(ct->vma); - /* We allocate 1 page to hold both descriptors and both buffers. - * ___________..................... - * |desc (SEND)| : - * |___________| PAGE/4 - * :___________....................: - * |desc (RECV)| : - * |___________| PAGE/4 - * :_______________________________: - * |cmds (SEND) | - * | PAGE/4 - * |_______________________________| - * |cmds (RECV) | - * | PAGE/4 - * |_______________________________| - * - * Each message can use a maximum of 32 dwords and we don't expect to - * have more than 1 in flight at any time, so we have enough space. - * Some logic further ahead will rely on the fact that there is only 1 - * page and that it is always mapped, so if the size is changed the - * other code will need updating as well. - */ - - err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob); + blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE; + err = intel_guc_allocate_and_map_vma(guc, blob_size, &ct->vma, &blob); if (unlikely(err)) { - CT_ERROR(ct, "Failed to allocate CT channel (err=%d)\n", err); + CT_PROBE_ERROR(ct, "Failed to allocate %u for CTB data (%pe)\n", + blob_size, ERR_PTR(err)); return err; } - CT_DEBUG(ct, "vma base=%#x\n", intel_guc_ggtt_offset(guc, ct->vma)); + CT_DEBUG(ct, "base=%#x size=%u\n", intel_guc_ggtt_offset(guc, ct->vma), blob_size); - /* store pointers to desc and cmds */ - for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { - GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i; - ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; - } + /* store pointers to desc and cmds for send ctb */ + desc = blob; + cmds = blob + 2 * CTB_DESC_SIZE; + cmds_size = CTB_H2G_BUFFER_SIZE; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + + guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size); + + /* store pointers to desc and cmds for recv ctb */ + desc = blob + CTB_DESC_SIZE; + cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE; + cmds_size = CTB_G2H_BUFFER_SIZE; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "recv", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + + guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size); return 0; } @@ -209,6 +247,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) { GEM_BUG_ON(ct->enabled); + tasklet_kill(&ct->receive_tasklet); i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP); memset(ct, 0, sizeof(*ct)); } @@ -222,37 +261,38 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) int intel_guc_ct_enable(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); - u32 base, cmds, size; + u32 base, cmds; + void *blob; int err; - int i; GEM_BUG_ON(ct->enabled); /* vma should be already allocated and map'ed */ GEM_BUG_ON(!ct->vma); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(ct->vma->obj)); base = intel_guc_ggtt_offset(guc, ct->vma); - /* (re)initialize descriptors - * cmds buffers are in the second half of the blob page - */ - for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { - GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - cmds = base + PAGE_SIZE / 4 * i + PAGE_SIZE / 2; - size = PAGE_SIZE / 4; - CT_DEBUG(ct, "%d: addr=%#x size=%u\n", i, cmds, size); - guc_ct_buffer_desc_init(ct->ctbs[i].desc, cmds, size); - } + /* blob should start with send descriptor */ + blob = __px_vaddr(ct->vma->obj); + GEM_BUG_ON(blob != ct->ctbs.send.desc); + + /* (re)initialize descriptors */ + cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); + guc_ct_buffer_reset(&ct->ctbs.send, cmds); + + cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); + guc_ct_buffer_reset(&ct->ctbs.recv, cmds); /* * Register both CT buffers starting with RECV buffer. * Descriptors are in first half of the blob. */ - err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_RECV, + err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.recv.desc, blob), INTEL_GUC_CT_BUFFER_TYPE_RECV); if (unlikely(err)) goto err_out; - err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_SEND, + err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.send.desc, blob), INTEL_GUC_CT_BUFFER_TYPE_SEND); if (unlikely(err)) goto err_deregister; @@ -264,7 +304,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) err_deregister: ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); err_out: - CT_ERROR(ct, "Failed to open open CT channel (err=%d)\n", err); + CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); return err; } @@ -292,6 +332,28 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } +static void write_barrier(struct intel_guc_ct *ct) +{ + struct intel_guc *guc = ct_to_guc(ct); + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + GEM_BUG_ON(guc->send_regs.fw_domains); + /* + * This register is used by the i915 and GuC for MMIO based + * communication. Once we are in this code CTBs are the only + * method the i915 uses to communicate with the GuC so it is + * safe to write to this register (a value of 0 is NOP for MMIO + * communication). If we ever start mixing CTBs and MMIOs a new + * register will have to be chosen. + */ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} + /** * DOC: CTB Host to GuC request * @@ -313,14 +375,13 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, - u32 fence, - bool want_response) + u32 fence) { - struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND]; + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct guc_ct_buffer_desc *desc = ctb->desc; u32 head = desc->head; u32 tail = desc->tail; - u32 size = desc->size; + u32 size = ctb->size; u32 used; u32 header; u32 *cmds = ctb->cmds; @@ -329,7 +390,7 @@ static int ct_write(struct intel_guc_ct *ct, if (unlikely(desc->is_in_error)) return -EPIPE; - if (unlikely(!IS_ALIGNED(head | tail | size, 4) || + if (unlikely(!IS_ALIGNED(head | tail, 4) || (tail | head) >= size)) goto corrupted; @@ -358,8 +419,7 @@ static int ct_write(struct intel_guc_ct *ct, * DW2+: action data */ header = (len << GUC_CT_MSG_LEN_SHIFT) | - (GUC_CT_MSG_WRITE_FENCE_TO_DESC) | - (want_response ? GUC_CT_MSG_SEND_STATUS : 0) | + GUC_CT_MSG_SEND_STATUS | (action[0] << GUC_CT_MSG_ACTION_SHIFT); CT_DEBUG(ct, "writing %*ph %*ph %*ph\n", @@ -377,6 +437,12 @@ static int ct_write(struct intel_guc_ct *ct, } GEM_BUG_ON(tail > size); + /* + * make sure H2G buffer update and LRC tail update (if this triggering a + * submission) are visible before updating the descriptor tail + */ + write_barrier(ct); + /* now update desc tail (back in bytes) */ desc->tail = tail * 4; return 0; @@ -389,56 +455,6 @@ corrupted: } /** - * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update. - * @desc: buffer descriptor - * @fence: response fence - * @status: placeholder for status - * - * Guc will update CT buffer descriptor with new fence and status - * after processing the command identified by the fence. Wait for - * specified fence and then read from the descriptor status of the - * command. - * - * Return: - * * 0 response received (status is valid) - * * -ETIMEDOUT no response within hardcoded timeout - * * -EPROTO no response, CT buffer is in error - */ -static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc, - u32 fence, - u32 *status) -{ - int err; - - /* - * Fast commands should complete in less than 10us, so sample quickly - * up to that length of time, then switch to a slower sleep-wait loop. - * No GuC command should ever take longer than 10ms. - */ -#define done (READ_ONCE(desc->fence) == fence) - err = wait_for_us(done, 10); - if (err) - err = wait_for(done, 10); -#undef done - - if (unlikely(err)) { - DRM_ERROR("CT: fence %u failed; reported fence=%u\n", - fence, desc->fence); - - if (WARN_ON(desc->is_in_error)) { - /* Something went wrong with the messaging, try to reset - * the buffer and hope for the best - */ - guc_ct_buffer_desc_reset(desc); - err = -EPROTO; - } - } - - *status = desc->status; - return err; -} - -/** * wait_for_ct_request_update - Wait for CT request state update. * @req: pointer to pending request * @status: placeholder for status @@ -481,8 +497,6 @@ static int ct_send(struct intel_guc_ct *ct, u32 response_buf_size, u32 *status) { - struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND]; - struct guc_ct_buffer_desc *desc = ctb->desc; struct ct_request request; unsigned long flags; u32 fence; @@ -493,26 +507,28 @@ static int ct_send(struct intel_guc_ct *ct, GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); + spin_lock_irqsave(&ct->ctbs.send.lock, flags); + fence = ct_get_next_fence(ct); request.fence = fence; request.status = 0; request.response_len = response_buf_size; request.response_buf = response_buf; - spin_lock_irqsave(&ct->requests.lock, flags); + spin_lock(&ct->requests.lock); list_add_tail(&request.link, &ct->requests.pending); - spin_unlock_irqrestore(&ct->requests.lock, flags); + spin_unlock(&ct->requests.lock); + + err = ct_write(ct, action, len, fence); + + spin_unlock_irqrestore(&ct->ctbs.send.lock, flags); - err = ct_write(ct, action, len, fence, !!response_buf); if (unlikely(err)) goto unlink; intel_guc_notify(ct_to_guc(ct)); - if (response_buf) - err = wait_for_ct_request_update(&request, status); - else - err = wait_for_ctb_desc_update(desc, fence, status); + err = wait_for_ct_request_update(&request, status); if (unlikely(err)) goto unlink; @@ -547,7 +563,6 @@ unlink: int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { - struct intel_guc *guc = ct_to_guc(ct); u32 status = ~0; /* undefined */ int ret; @@ -556,8 +571,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, return -ENODEV; } - mutex_lock(&guc->send_mutex); - ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", @@ -567,7 +580,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, action[0], ret, ret); } - mutex_unlock(&guc->send_mutex); return ret; } @@ -586,22 +598,42 @@ static inline bool ct_header_is_response(u32 header) return !!(header & GUC_CT_MSG_IS_RESPONSE); } -static int ct_read(struct intel_guc_ct *ct, u32 *data) +static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords) { - struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV]; + struct ct_incoming_msg *msg; + + msg = kmalloc(sizeof(*msg) + sizeof(u32) * num_dwords, GFP_ATOMIC); + if (msg) + msg->size = num_dwords; + return msg; +} + +static void ct_free_msg(struct ct_incoming_msg *msg) +{ + kfree(msg); +} + +/* + * Return: number available remaining dwords to read (0 if empty) + * or a negative error code on failure + */ +static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; struct guc_ct_buffer_desc *desc = ctb->desc; u32 head = desc->head; u32 tail = desc->tail; - u32 size = desc->size; + u32 size = ctb->size; u32 *cmds = ctb->cmds; s32 available; unsigned int len; unsigned int i; + u32 header; if (unlikely(desc->is_in_error)) return -EPIPE; - if (unlikely(!IS_ALIGNED(head | tail | size, 4) || + if (unlikely(!IS_ALIGNED(head | tail, 4) || (tail | head) >= size)) goto corrupted; @@ -612,8 +644,10 @@ static int ct_read(struct intel_guc_ct *ct, u32 *data) /* tail == head condition indicates empty */ available = tail - head; - if (unlikely(available == 0)) - return -ENODATA; + if (unlikely(available == 0)) { + *msg = NULL; + return 0; + } /* beware of buffer wrap case */ if (unlikely(available < 0)) @@ -621,14 +655,14 @@ static int ct_read(struct intel_guc_ct *ct, u32 *data) CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail); GEM_BUG_ON(available < 0); - data[0] = cmds[head]; + header = cmds[head]; head = (head + 1) % size; /* message len with header */ - len = ct_header_get_len(data[0]) + 1; + len = ct_header_get_len(header) + 1; if (unlikely(len > (u32)available)) { CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n", - 4, data, + 4, &header, 4 * (head + available - 1 > size ? size - head : available - 1), &cmds[head], 4 * (head + available - 1 > size ? @@ -636,14 +670,27 @@ static int ct_read(struct intel_guc_ct *ct, u32 *data) goto corrupted; } + *msg = ct_alloc_msg(len); + if (!*msg) { + CT_ERROR(ct, "No memory for message %*ph %*ph %*ph\n", + 4, &header, + 4 * (head + available - 1 > size ? + size - head : available - 1), &cmds[head], + 4 * (head + available - 1 > size ? + available - 1 - size + head : 0), &cmds[0]); + return available; + } + + (*msg)->msg[0] = header; + for (i = 1; i < len; i++) { - data[i] = cmds[head]; + (*msg)->msg[i] = cmds[head]; head = (head + 1) % size; } - CT_DEBUG(ct, "received %*ph\n", 4 * len, data); + CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg); desc->head = head * 4; - return 0; + return available - len; corrupted: CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", @@ -670,39 +717,39 @@ corrupted: * ^-----------------------len-----------------------^ */ -static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) +static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response) { - u32 header = msg[0]; + u32 header = response->msg[0]; u32 len = ct_header_get_len(header); - u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */ u32 fence; u32 status; u32 datalen; struct ct_request *req; + unsigned long flags; bool found = false; + int err = 0; GEM_BUG_ON(!ct_header_is_response(header)); - GEM_BUG_ON(!in_irq()); /* Response payload shall at least include fence and status */ if (unlikely(len < 2)) { - CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg); + CT_ERROR(ct, "Corrupted response (len %u)\n", len); return -EPROTO; } - fence = msg[1]; - status = msg[2]; + fence = response->msg[1]; + status = response->msg[2]; datalen = len - 2; /* Format of the status follows RESPONSE message */ if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { - CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg); + CT_ERROR(ct, "Corrupted response (status %#x)\n", status); return -EPROTO; } CT_DEBUG(ct, "response fence %u status %#x\n", fence, status); - spin_lock(&ct->requests.lock); + spin_lock_irqsave(&ct->requests.lock, flags); list_for_each_entry(req, &ct->requests.pending, link) { if (unlikely(fence != req->fence)) { CT_DEBUG(ct, "request %u awaits response\n", @@ -710,58 +757,75 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) continue; } if (unlikely(datalen > req->response_len)) { - CT_ERROR(ct, "Response for %u is too long %*ph\n", - req->fence, msgsize, msg); - datalen = 0; + CT_ERROR(ct, "Response %u too long (datalen %u > %u)\n", + req->fence, datalen, req->response_len); + datalen = min(datalen, req->response_len); + err = -EMSGSIZE; } if (datalen) - memcpy(req->response_buf, msg + 3, 4 * datalen); + memcpy(req->response_buf, response->msg + 3, 4 * datalen); req->response_len = datalen; WRITE_ONCE(req->status, status); found = true; break; } - spin_unlock(&ct->requests.lock); + spin_unlock_irqrestore(&ct->requests.lock, flags); + + if (!found) { + CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence); + return -ENOKEY; + } + + if (unlikely(err)) + return err; - if (!found) - CT_ERROR(ct, "Unsolicited response %*ph\n", msgsize, msg); + ct_free_msg(response); return 0; } -static void ct_process_request(struct intel_guc_ct *ct, - u32 action, u32 len, const u32 *payload) +static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { struct intel_guc *guc = ct_to_guc(ct); + u32 header, action, len; + const u32 *payload; int ret; + header = request->msg[0]; + payload = &request->msg[1]; + action = ct_header_get_action(header); + len = ct_header_get_len(header); + CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload); switch (action) { case INTEL_GUC_ACTION_DEFAULT: ret = intel_guc_to_host_process_recv_msg(guc, payload, len); - if (unlikely(ret)) - goto fail_unexpected; break; - default: -fail_unexpected: - CT_ERROR(ct, "Unexpected request %x %*ph\n", - action, 4 * len, payload); + ret = -EOPNOTSUPP; break; } + + if (unlikely(ret)) { + CT_ERROR(ct, "Failed to process request %04x (%pe)\n", + action, ERR_PTR(ret)); + return ret; + } + + ct_free_msg(request); + return 0; } static bool ct_process_incoming_requests(struct intel_guc_ct *ct) { unsigned long flags; - struct ct_incoming_request *request; - u32 header; - u32 *payload; + struct ct_incoming_msg *request; bool done; + int err; spin_lock_irqsave(&ct->requests.lock, flags); request = list_first_entry_or_null(&ct->requests.incoming, - struct ct_incoming_request, link); + struct ct_incoming_msg, link); if (request) list_del(&request->link); done = !!list_empty(&ct->requests.incoming); @@ -770,14 +834,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) if (!request) return true; - header = request->msg[0]; - payload = &request->msg[1]; - ct_process_request(ct, - ct_header_get_action(header), - ct_header_get_len(header), - payload); + err = ct_process_request(ct, request); + if (unlikely(err)) { + CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", + ERR_PTR(err), 4 * request->size, request->msg); + ct_free_msg(request); + } - kfree(request); return done; } @@ -810,22 +873,11 @@ static void ct_incoming_request_worker_func(struct work_struct *w) * ^-----------------------len-----------------------^ */ -static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) +static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { - u32 header = msg[0]; - u32 len = ct_header_get_len(header); - u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */ - struct ct_incoming_request *request; unsigned long flags; - GEM_BUG_ON(ct_header_is_response(header)); - - request = kmalloc(sizeof(*request) + msgsize, GFP_ATOMIC); - if (unlikely(!request)) { - CT_ERROR(ct, "Dropping request %*ph\n", msgsize, msg); - return 0; /* XXX: -ENOMEM ? */ - } - memcpy(request->msg, msg, msgsize); + GEM_BUG_ON(ct_header_is_response(request->msg[0])); spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); @@ -835,28 +887,74 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) return 0; } +static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +{ + u32 header = msg->msg[0]; + int err; + + if (ct_header_is_response(header)) + err = ct_handle_response(ct, msg); + else + err = ct_handle_request(ct, msg); + + if (unlikely(err)) { + CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", + ERR_PTR(err), 4 * msg->size, msg->msg); + ct_free_msg(msg); + } +} + +/* + * Return: number available remaining dwords to read (0 if empty) + * or a negative error code on failure + */ +static int ct_receive(struct intel_guc_ct *ct) +{ + struct ct_incoming_msg *msg = NULL; + unsigned long flags; + int ret; + + spin_lock_irqsave(&ct->ctbs.recv.lock, flags); + ret = ct_read(ct, &msg); + spin_unlock_irqrestore(&ct->ctbs.recv.lock, flags); + if (ret < 0) + return ret; + + if (msg) + ct_handle_msg(ct, msg); + + return ret; +} + +static void ct_try_receive_message(struct intel_guc_ct *ct) +{ + int ret; + + if (GEM_WARN_ON(!ct->enabled)) + return; + + ret = ct_receive(ct); + if (ret > 0) + tasklet_hi_schedule(&ct->receive_tasklet); +} + +static void ct_receive_tasklet_func(struct tasklet_struct *t) +{ + struct intel_guc_ct *ct = from_tasklet(ct, t, receive_tasklet); + + ct_try_receive_message(ct); +} + /* * When we're communicating with the GuC over CT, GuC uses events * to notify us about new messages being posted on the RECV buffer. */ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) { - u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ - int err = 0; - if (unlikely(!ct->enabled)) { WARN(1, "Unexpected GuC event received while CT disabled!\n"); return; } - do { - err = ct_read(ct, msg); - if (err) - break; - - if (ct_header_is_response(msg[0])) - err = ct_handle_response(ct, msg); - else - err = ct_handle_request(ct, msg); - } while (!err); + ct_try_receive_message(ct); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 494a51a5200f..cb222f202301 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -6,6 +6,7 @@ #ifndef _INTEL_GUC_CT_H_ #define _INTEL_GUC_CT_H_ +#include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/workqueue.h> @@ -27,12 +28,16 @@ struct intel_guc; * record (command transport buffer descriptor) and the actual buffer which * holds the commands. * + * @lock: protects access to the commands buffer and buffer descriptor * @desc: pointer to the buffer descriptor * @cmds: pointer to the commands buffer + * @size: size of the commands buffer */ struct intel_guc_ct_buffer { + spinlock_t lock; struct guc_ct_buffer_desc *desc; u32 *cmds; + u32 size; }; @@ -45,8 +50,13 @@ struct intel_guc_ct { struct i915_vma *vma; bool enabled; - /* buffers for sending(0) and receiving(1) commands */ - struct intel_guc_ct_buffer ctbs[2]; + /* buffers for sending and receiving commands */ + struct { + struct intel_guc_ct_buffer send; + struct intel_guc_ct_buffer recv; + } ctbs; + + struct tasklet_struct receive_tasklet; struct { u32 last_fence; /* last fence used to send request */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 2270d6b3b272..76fe766ad1bc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -30,7 +30,7 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) else intel_uncore_write(uncore, GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); - if (IS_GEN(uncore->i915, 9)) { + if (GRAPHICS_VER(uncore->i915) == 9) { /* DOP Clock Gating Enable for GuC clocks */ intel_uncore_rmw(uncore, GEN7_MISCCPCTL, 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 79c560d9c0b6..e9a9d85e2aa3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -9,6 +9,13 @@ #include <linux/bits.h> #include <linux/compiler.h> #include <linux/types.h> +#include "gt/intel_engine_types.h" + +#include "abi/guc_actions_abi.h" +#include "abi/guc_errors_abi.h" +#include "abi/guc_communication_mmio_abi.h" +#include "abi/guc_communication_ctb_abi.h" +#include "abi/guc_messages_abi.h" #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 @@ -26,6 +33,12 @@ #define GUC_VIDEO_ENGINE2 4 #define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) +#define GUC_RENDER_CLASS 0 +#define GUC_VIDEO_CLASS 1 +#define GUC_VIDEOENHANCE_CLASS 2 +#define GUC_BLITTER_CLASS 3 +#define GUC_RESERVED_CLASS 4 +#define GUC_LAST_ENGINE_CLASS GUC_RESERVED_CLASS #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 @@ -123,6 +136,25 @@ #define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \ (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT) +static inline u8 engine_class_to_guc_class(u8 class) +{ + BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS); + BUILD_BUG_ON(GUC_BLITTER_CLASS != COPY_ENGINE_CLASS); + BUILD_BUG_ON(GUC_VIDEO_CLASS != VIDEO_DECODE_CLASS); + BUILD_BUG_ON(GUC_VIDEOENHANCE_CLASS != VIDEO_ENHANCEMENT_CLASS); + GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS); + + return class; +} + +static inline u8 guc_class_to_engine_class(u8 guc_class) +{ + GEM_BUG_ON(guc_class > GUC_LAST_ENGINE_CLASS); + GEM_BUG_ON(guc_class == GUC_RESERVED_CLASS); + + return guc_class; +} + /* Work item for submitting workloads into work queue of GuC. */ struct guc_wq_item { u32 header; @@ -207,104 +239,6 @@ struct guc_stage_desc { u64 desc_private; } __packed; -/** - * DOC: CTB based communication - * - * The CTB (command transport buffer) communication between Host and GuC - * is based on u32 data stream written to the shared buffer. One buffer can - * be used to transmit data only in one direction (one-directional channel). - * - * Current status of the each buffer is stored in the buffer descriptor. - * Buffer descriptor holds tail and head fields that represents active data - * stream. The tail field is updated by the data producer (sender), and head - * field is updated by the data consumer (receiver):: - * - * +------------+ - * | DESCRIPTOR | +=================+============+========+ - * +============+ | | MESSAGE(s) | | - * | address |--------->+=================+============+========+ - * +------------+ - * | head | ^-----head--------^ - * +------------+ - * | tail | ^---------tail-----------------^ - * +------------+ - * | size | ^---------------size--------------------^ - * +------------+ - * - * Each message in data stream starts with the single u32 treated as a header, - * followed by optional set of u32 data that makes message specific payload:: - * - * +------------+---------+---------+---------+ - * | MESSAGE | - * +------------+---------+---------+---------+ - * | msg[0] | [1] | ... | [n-1] | - * +------------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+ - * | | 0 | ... | n | - * +======+=====+=========+=========+=========+ - * | 31:16| code| | | | - * +------+-----+ | | | - * | 15:5|flags| | | | - * +------+-----+ | | | - * | 4:0| len| | | | - * +------+-----+---------+---------+---------+ - * - * ^-------------len-------------^ - * - * The message header consists of: - * - * - **len**, indicates length of the message payload (in u32) - * - **code**, indicates message code - * - **flags**, holds various bits to control message handling - */ - -/* - * Describes single command transport buffer. - * Used by both guc-master and clients. - */ -struct guc_ct_buffer_desc { - u32 addr; /* gfx address */ - u64 host_private; /* host private data */ - u32 size; /* size in bytes */ - u32 head; /* offset updated by GuC*/ - u32 tail; /* offset updated by owner */ - u32 is_in_error; /* error indicator */ - u32 fence; /* fence updated by GuC */ - u32 status; /* status updated by GuC */ - u32 owner; /* id of the channel owner */ - u32 owner_sub_id; /* owner-defined field for extra tracking */ - u32 reserved[5]; -} __packed; - -/* Type of command transport buffer */ -#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u -#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u - -/* - * Definition of the command transport message header (DW0) - * - * bit[4..0] message len (in dwords) - * bit[7..5] reserved - * bit[8] response (G2H only) - * bit[8] write fence to desc (H2G only) - * bit[9] write status to H2G buff (H2G only) - * bit[10] send status back via G2H (H2G only) - * bit[15..11] reserved - * bit[31..16] action code - */ -#define GUC_CT_MSG_LEN_SHIFT 0 -#define GUC_CT_MSG_LEN_MASK 0x1F -#define GUC_CT_MSG_IS_RESPONSE (1 << 8) -#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) -#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) -#define GUC_CT_MSG_SEND_STATUS (1 << 10) -#define GUC_CT_MSG_ACTION_SHIFT 16 -#define GUC_CT_MSG_ACTION_MASK 0xFFFF - -#define GUC_FORCEWAKE_RENDER (1 << 0) -#define GUC_FORCEWAKE_MEDIA (1 << 1) - #define GUC_POWER_UNSPECIFIED 0 #define GUC_POWER_D0 1 #define GUC_POWER_D1 2 @@ -480,120 +414,17 @@ struct guc_shared_ctx_data { struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; } __packed; -/** - * DOC: MMIO based communication - * - * The MMIO based communication between Host and GuC uses software scratch - * registers, where first register holds data treated as message header, - * and other registers are used to hold message payload. - * - * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8, - * but no H2G command takes more than 8 parameters and the GuC FW - * itself uses an 8-element array to store the H2G message. - * - * +-----------+---------+---------+---------+ - * | MMIO[0] | MMIO[1] | ... | MMIO[n] | - * +-----------+---------+---------+---------+ - * | header | optional payload | - * +======+====+=========+=========+=========+ - * | 31:28|type| | | | - * +------+----+ | | | - * | 27:16|data| | | | - * +------+----+ | | | - * | 15:0|code| | | | - * +------+----+---------+---------+---------+ - * - * The message header consists of: - * - * - **type**, indicates message type - * - **code**, indicates message code, is specific for **type** - * - **data**, indicates message data, optional, depends on **code** - * - * The following message **types** are supported: - * - * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code - * must be priovided in **code** field. Optional action specific parameters - * can be provided in remaining payload registers or **data** field. - * - * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, - * action response status will be provided in **code** field. Optional - * response data can be returned in remaining payload registers or **data** - * field. - */ - -#define GUC_MAX_MMIO_MSG_LEN 8 - -#define INTEL_GUC_MSG_TYPE_SHIFT 28 -#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) -#define INTEL_GUC_MSG_DATA_SHIFT 16 -#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) -#define INTEL_GUC_MSG_CODE_SHIFT 0 -#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) - #define __INTEL_GUC_MSG_GET(T, m) \ (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) #define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) #define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) #define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) -enum intel_guc_msg_type { - INTEL_GUC_MSG_TYPE_REQUEST = 0x0, - INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, -}; - #define __INTEL_GUC_MSG_TYPE_IS(T, m) \ (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) #define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) #define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) -enum intel_guc_action { - INTEL_GUC_ACTION_DEFAULT = 0x0, - INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, - INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, - INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, - INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, - INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, - INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, - INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, - INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, - INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, - INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x3005, - INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, - INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, - INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, - INTEL_GUC_ACTION_LIMIT -}; - -enum intel_guc_preempt_options { - INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, - INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, -}; - -enum intel_guc_report_status { - INTEL_GUC_REPORT_STATUS_UNKNOWN = 0x0, - INTEL_GUC_REPORT_STATUS_ACKED = 0x1, - INTEL_GUC_REPORT_STATUS_ERROR = 0x2, - INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, -}; - -enum intel_guc_sleep_state_status { - INTEL_GUC_SLEEP_STATE_SUCCESS = 0x1, - INTEL_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, - INTEL_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 -#define INTEL_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 -}; - -#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) -#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 -#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) -#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) - -enum intel_guc_response_status { - INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, - INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, -}; - #define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ (typecheck(u32, (m)) && \ ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 92688a9b6717..7c8ff9792f7b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -11,6 +11,7 @@ #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" #include "gt/intel_lrc.h" #include "gt/intel_mocs.h" @@ -264,6 +265,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) +{ + if (iir & GT_RENDER_USER_INTERRUPT) { + intel_engine_signal_breadcrumbs(engine); + tasklet_hi_schedule(&engine->execlists.tasklet); + } +} + static void guc_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -423,32 +432,6 @@ void intel_guc_submission_fini(struct intel_guc *guc) } } -static void guc_interrupts_capture(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; - u32 dmask = irqs << 16 | irqs; - - GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); - - /* Don't handle the ctx switch interrupt in GuC submission mode */ - intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0); - intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0); -} - -static void guc_interrupts_release(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; - u32 dmask = irqs << 16 | irqs; - - GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); - - /* Handle ctx switch interrupts again */ - intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask); - intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); -} - static int guc_context_alloc(struct intel_context *ce) { return lrc_alloc(ce, ce->engine); @@ -608,35 +591,6 @@ static int guc_resume(struct intel_engine_cs *engine) static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; - engine->schedule = i915_schedule; - engine->execlists.tasklet.callback = guc_submission_tasklet; - - engine->reset.prepare = guc_reset_prepare; - engine->reset.rewind = guc_reset_rewind; - engine->reset.cancel = guc_reset_cancel; - engine->reset.finish = guc_reset_finish; - - engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; - engine->flags |= I915_ENGINE_HAS_PREEMPTION; - - /* - * TODO: GuC supports timeslicing and semaphores as well, but they're - * handled by the firmware so some minor tweaks are required before - * enabling. - * - * engine->flags |= I915_ENGINE_HAS_TIMESLICES; - * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - */ - - engine->emit_bb_start = gen8_emit_bb_start; - - /* - * For the breadcrumb irq to work we need the interrupts to stay - * enabled. However, on all platforms on which we'll have support for - * GuC submission we don't allow disabling the interrupts at runtime, so - * we're always safe with the current flow. - */ - GEM_BUG_ON(engine->irq_enable || engine->irq_disable); } static void guc_release(struct intel_engine_cs *engine) @@ -658,19 +612,39 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &guc_context_ops; engine->request_alloc = guc_request_alloc; + engine->schedule = i915_schedule; + + engine->reset.prepare = guc_reset_prepare; + engine->reset.rewind = guc_reset_rewind; + engine->reset.cancel = guc_reset_cancel; + engine->reset.finish = guc_reset_finish; + engine->emit_flush = gen8_emit_flush_xcs; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; - if (INTEL_GEN(engine->i915) >= 12) { + if (GRAPHICS_VER(engine->i915) >= 12) { engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; engine->emit_flush = gen12_emit_flush_xcs; } engine->set_default_submission = guc_set_default_submission; + + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + + /* + * TODO: GuC supports timeslicing and semaphores as well, but they're + * handled by the firmware so some minor tweaks are required before + * enabling. + * + * engine->flags |= I915_ENGINE_HAS_TIMESLICES; + * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; + */ + + engine->emit_bb_start = gen8_emit_bb_start; } static void rcs_submission_override(struct intel_engine_cs *engine) { - switch (INTEL_GEN(engine->i915)) { + switch (GRAPHICS_VER(engine->i915)) { case 12: engine->emit_flush = gen12_emit_flush_rcs; engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; @@ -689,6 +663,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine) static inline void guc_default_irqs(struct intel_engine_cs *engine) { engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; + intel_engine_set_irq_handler(engine, cs_irq_handler); } int intel_guc_submission_setup(struct intel_engine_cs *engine) @@ -699,7 +674,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) * The setup relies on several assumptions (e.g. irqs always enabled) * that are only valid on gen11+ */ - GEM_BUG_ON(INTEL_GEN(i915) < 11); + GEM_BUG_ON(GRAPHICS_VER(i915) < 11); tasklet_setup(&engine->execlists.tasklet, guc_submission_tasklet); @@ -721,9 +696,6 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { guc_stage_desc_init(guc); - - /* Take over from manual control of ELSP (execlists) */ - guc_interrupts_capture(guc_to_gt(guc)); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -734,8 +706,6 @@ void intel_guc_submission_disable(struct intel_guc *guc) /* Note: By the time we're here, GuC may have already been reset */ - guc_interrupts_release(gt); - guc_stage_desc_fini(guc); } @@ -753,8 +723,3 @@ void intel_guc_submission_init_early(struct intel_guc *guc) { guc->submission_selected = __guc_submission_selected(guc); } - -bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) -{ - return engine->set_default_submission == guc_set_default_submission; -} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 5f7b9e6347d0..3f7005018939 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -20,7 +20,6 @@ void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); int intel_guc_submission_setup(struct intel_engine_cs *engine); -bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 2126dd81ac38..fc5387b410a2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -43,7 +43,7 @@ void intel_huc_init_early(struct intel_huc *huc) intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); - if (INTEL_GEN(i915) >= 11) { + if (GRAPHICS_VER(i915) >= 11) { huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; huc->status.mask = HUC_LOAD_SUCCESSFUL; huc->status.value = HUC_LOAD_SUCCESSFUL; @@ -82,7 +82,9 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) if (IS_ERR(vma)) return PTR_ERR(vma); - vaddr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(vma->obj, + i915_coherent_map_type(gt->i915, + vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6abb8f2dc33d..6d8b9233214e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -23,7 +23,7 @@ static void uc_expand_default_options(struct intel_uc *uc) return; /* Don't enable GuC/HuC on pre-Gen12 */ - if (INTEL_GEN(i915) < 12) { + if (GRAPHICS_VER(i915) < 12) { i915->params.enable_guc = 0; return; } @@ -467,7 +467,7 @@ static int __uc_init_hw(struct intel_uc *uc) /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ - if (IS_GEN(i915, 9)) + if (GRAPHICS_VER(i915) == 9) attempts = 3; else attempts = 1; @@ -502,10 +502,6 @@ static int __uc_init_hw(struct intel_uc *uc) intel_huc_auth(huc); - ret = intel_guc_sample_forcewake(guc); - if (ret) - goto err_communication; - if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); @@ -529,8 +525,6 @@ static int __uc_init_hw(struct intel_uc *uc) /* * We've failed to load the firmware :( */ -err_communication: - guc_disable_communication(guc); err_log_capture: __uc_capture_load_err_log(uc); err_out: @@ -558,9 +552,6 @@ static void __uc_fini_hw(struct intel_uc *uc) if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_disable(guc); - if (guc_communication_enabled(guc)) - guc_disable_communication(guc); - __uc_sanitize(uc); } @@ -577,7 +568,6 @@ void intel_uc_reset_prepare(struct intel_uc *uc) if (!intel_guc_is_ready(guc)) return; - guc_disable_communication(guc); __uc_sanitize(uc); } diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index e7c2babcee8b..cbac409f6c8a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -46,118 +46,6 @@ static const char * const supported_hypervisors[] = { [INTEL_GVT_HYPERVISOR_KVM] = "KVM", }; -static struct intel_vgpu_type * -intel_gvt_find_vgpu_type(struct intel_gvt *gvt, unsigned int type_group_id) -{ - if (WARN_ON(type_group_id >= gvt->num_types)) - return NULL; - return &gvt->types[type_group_id]; -} - -static ssize_t available_instances_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, - char *buf) -{ - struct intel_vgpu_type *type; - unsigned int num = 0; - void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; - - type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype)); - if (!type) - num = 0; - else - num = type->avail_instance; - - return sprintf(buf, "%u\n", num); -} - -static ssize_t device_api_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf) -{ - return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); -} - -static ssize_t description_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf) -{ - struct intel_vgpu_type *type; - void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; - - type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype)); - if (!type) - return 0; - - return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\nresolution: %s\n" - "weight: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution), - type->weight); -} - -static MDEV_TYPE_ATTR_RO(available_instances); -static MDEV_TYPE_ATTR_RO(device_api); -static MDEV_TYPE_ATTR_RO(description); - -static struct attribute *gvt_type_attrs[] = { - &mdev_type_attr_available_instances.attr, - &mdev_type_attr_device_api.attr, - &mdev_type_attr_description.attr, - NULL, -}; - -static struct attribute_group *gvt_vgpu_type_groups[] = { - [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, -}; - -static bool intel_get_gvt_attrs(struct attribute_group ***intel_vgpu_type_groups) -{ - *intel_vgpu_type_groups = gvt_vgpu_type_groups; - return true; -} - -static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i, j; - struct intel_vgpu_type *type; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - type = &gvt->types[i]; - - group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); - if (WARN_ON(!group)) - goto unwind; - - group->name = type->name; - group->attrs = gvt_type_attrs; - gvt_vgpu_type_groups[i] = group; - } - - return 0; - -unwind: - for (j = 0; j < i; j++) { - group = gvt_vgpu_type_groups[j]; - kfree(group); - } - - return -ENOMEM; -} - -static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - group = gvt_vgpu_type_groups[i]; - gvt_vgpu_type_groups[i] = NULL; - kfree(group); - } -} - static const struct intel_gvt_ops intel_gvt_ops = { .emulate_cfg_read = intel_vgpu_emulate_cfg_read, .emulate_cfg_write = intel_vgpu_emulate_cfg_write, @@ -169,8 +57,6 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_reset = intel_gvt_reset_vgpu, .vgpu_activate = intel_gvt_activate_vgpu, .vgpu_deactivate = intel_gvt_deactivate_vgpu, - .gvt_find_vgpu_type = intel_gvt_find_vgpu_type, - .get_gvt_attrs = intel_get_gvt_attrs, .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .write_protect_handler = intel_vgpu_page_track_handler, @@ -274,7 +160,6 @@ void intel_gvt_clean_device(struct drm_i915_private *i915) return; intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); intel_gvt_debugfs_clean(gvt); @@ -363,12 +248,6 @@ int intel_gvt_init_device(struct drm_i915_private *i915) if (ret) goto out_clean_thread; - ret = intel_gvt_init_vgpu_type_groups(gvt); - if (ret) { - gvt_err("failed to init vgpu type groups: %d\n", ret); - goto out_clean_types; - } - vgpu = intel_gvt_create_idle_vgpu(gvt); if (IS_ERR(vgpu)) { ret = PTR_ERR(vgpu); @@ -454,7 +333,8 @@ EXPORT_SYMBOL_GPL(intel_gvt_register_hypervisor); void intel_gvt_unregister_hypervisor(void) { - intel_gvt_hypervisor_host_exit(intel_gvt_host.dev); + void *gvt = (void *)kdev_to_i915(intel_gvt_host.dev)->gvt; + intel_gvt_hypervisor_host_exit(intel_gvt_host.dev, gvt); module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(intel_gvt_unregister_hypervisor); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 88ab360fcb31..0c0615602343 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -574,9 +574,6 @@ struct intel_gvt_ops { void (*vgpu_reset)(struct intel_vgpu *); void (*vgpu_activate)(struct intel_vgpu *); void (*vgpu_deactivate)(struct intel_vgpu *); - struct intel_vgpu_type *(*gvt_find_vgpu_type)( - struct intel_gvt *gvt, unsigned int type_group_id); - bool (*get_gvt_attrs)(struct attribute_group ***intel_vgpu_type_groups); int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *); int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); int (*write_protect_handler)(struct intel_vgpu *, u64, void *, diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index b79da5124f83..f33e3cbd0439 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -49,7 +49,7 @@ enum hypervisor_type { struct intel_gvt_mpt { enum hypervisor_type type; int (*host_init)(struct device *dev, void *gvt, const void *ops); - void (*host_exit)(struct device *dev); + void (*host_exit)(struct device *dev, void *gvt); int (*attach_vgpu)(void *vgpu, unsigned long *handle); void (*detach_vgpu)(void *vgpu); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 65ff43cfc0f7..48b4d4cf805d 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -144,6 +144,104 @@ static inline bool handle_valid(unsigned long handle) return !!(handle & ~0xff); } +static ssize_t available_instances_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, + char *buf) +{ + struct intel_vgpu_type *type; + unsigned int num = 0; + struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; + + type = &gvt->types[mtype_get_type_group_id(mtype)]; + if (!type) + num = 0; + else + num = type->avail_instance; + + return sprintf(buf, "%u\n", num); +} + +static ssize_t device_api_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); +} + +static ssize_t description_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, char *buf) +{ + struct intel_vgpu_type *type; + struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; + + type = &gvt->types[mtype_get_type_group_id(mtype)]; + if (!type) + return 0; + + return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" + "fence: %d\nresolution: %s\n" + "weight: %d\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution), + type->weight); +} + +static MDEV_TYPE_ATTR_RO(available_instances); +static MDEV_TYPE_ATTR_RO(device_api); +static MDEV_TYPE_ATTR_RO(description); + +static struct attribute *gvt_type_attrs[] = { + &mdev_type_attr_available_instances.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_description.attr, + NULL, +}; + +static struct attribute_group *gvt_vgpu_type_groups[] = { + [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, +}; + +static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i, j; + struct intel_vgpu_type *type; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + type = &gvt->types[i]; + + group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); + if (!group) + goto unwind; + + group->name = type->name; + group->attrs = gvt_type_attrs; + gvt_vgpu_type_groups[i] = group; + } + + return 0; + +unwind: + for (j = 0; j < i; j++) { + group = gvt_vgpu_type_groups[j]; + kfree(group); + } + + return -ENOMEM; +} + +static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + group = gvt_vgpu_type_groups[i]; + gvt_vgpu_type_groups[i] = NULL; + kfree(group); + } +} + static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); @@ -694,14 +792,13 @@ static int intel_vgpu_create(struct mdev_device *mdev) struct intel_vgpu *vgpu = NULL; struct intel_vgpu_type *type; struct device *pdev; - void *gvt; + struct intel_gvt *gvt; int ret; pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; - type = intel_gvt_ops->gvt_find_vgpu_type(gvt, - mdev_get_type_group_id(mdev)); + type = &gvt->types[mdev_get_type_group_id(mdev)]; if (!type) { ret = -EINVAL; goto out; @@ -1667,19 +1764,26 @@ static struct mdev_parent_ops intel_vgpu_ops = { static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) { - struct attribute_group **kvm_vgpu_type_groups; + int ret; + + ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt); + if (ret) + return ret; intel_gvt_ops = ops; - if (!intel_gvt_ops->get_gvt_attrs(&kvm_vgpu_type_groups)) - return -EFAULT; - intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups; + intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups; - return mdev_register_device(dev, &intel_vgpu_ops); + ret = mdev_register_device(dev, &intel_vgpu_ops); + if (ret) + intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); + + return ret; } -static void kvmgt_host_exit(struct device *dev) +static void kvmgt_host_exit(struct device *dev, void *gvt) { mdev_unregister_device(dev); + intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); } static int kvmgt_page_track_add(unsigned long handle, u64 gfn) diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 550a456e936f..e6c5a792a49a 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -63,13 +63,13 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev, /** * intel_gvt_hypervisor_host_exit - exit GVT-g host side */ -static inline void intel_gvt_hypervisor_host_exit(struct device *dev) +static inline void intel_gvt_hypervisor_host_exit(struct device *dev, void *gvt) { /* optional to provide */ if (!intel_gvt_host.mpt->host_exit) return; - intel_gvt_host.mpt->host_exit(dev); + intel_gvt_host.mpt->host_exit(dev, gvt); } /** diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index aa573b078ae7..b1aa1c482c32 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -343,18 +343,15 @@ out: void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), + unsigned long flags, struct lock_class_key *mkey, struct lock_class_key *wkey) { - unsigned long bits; - debug_active_init(ref); - ref->flags = 0; + ref->flags = flags; ref->active = active; - ref->retire = ptr_unpack_bits(retire, &bits, 2); - if (bits & I915_ACTIVE_MAY_SLEEP) - ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; + ref->retire = retire; spin_lock_init(&ref->tree_lock); ref->tree = RB_ROOT; @@ -1156,8 +1153,7 @@ static int auto_active(struct i915_active *ref) return 0; } -__i915_active_call static void -auto_retire(struct i915_active *ref) +static void auto_retire(struct i915_active *ref) { i915_active_put(ref); } @@ -1171,7 +1167,7 @@ struct i915_active *i915_active_create(void) return NULL; kref_init(&aa->ref); - i915_active_init(&aa->base, auto_active, auto_retire); + i915_active_init(&aa->base, auto_active, auto_retire, 0); return &aa->base; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index fb165d3f01cf..d0feda68b874 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -152,15 +152,16 @@ i915_active_fence_isset(const struct i915_active_fence *active) void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), + unsigned long flags, struct lock_class_key *mkey, struct lock_class_key *wkey); /* Specialise each class of i915_active to avoid impossible lockdep cycles. */ -#define i915_active_init(ref, active, retire) do { \ - static struct lock_class_key __mkey; \ - static struct lock_class_key __wkey; \ - \ - __i915_active_init(ref, active, retire, &__mkey, &__wkey); \ +#define i915_active_init(ref, active, retire, flags) do { \ + static struct lock_class_key __mkey; \ + static struct lock_class_key __wkey; \ + \ + __i915_active_init(ref, active, retire, flags, &__mkey, &__wkey); \ } while (0) struct dma_fence * diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 6360c3e4b765..c149f348a972 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -24,11 +24,6 @@ struct i915_active_fence { struct active_node; -#define I915_ACTIVE_MAY_SLEEP BIT(0) - -#define __i915_active_call __aligned(4) -#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) - struct i915_active { atomic_t count; struct mutex mutex; diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c deleted file mode 100644 index 3a2f6eecb2fc..000000000000 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ /dev/null @@ -1,435 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include <linux/kmemleak.h> -#include <linux/slab.h> - -#include "i915_buddy.h" - -#include "i915_gem.h" -#include "i915_globals.h" -#include "i915_utils.h" - -static struct i915_global_block { - struct i915_global base; - struct kmem_cache *slab_blocks; -} global; - -static void i915_global_buddy_shrink(void) -{ - kmem_cache_shrink(global.slab_blocks); -} - -static void i915_global_buddy_exit(void) -{ - kmem_cache_destroy(global.slab_blocks); -} - -static struct i915_global_block global = { { - .shrink = i915_global_buddy_shrink, - .exit = i915_global_buddy_exit, -} }; - -int __init i915_global_buddy_init(void) -{ - global.slab_blocks = KMEM_CACHE(i915_buddy_block, SLAB_HWCACHE_ALIGN); - if (!global.slab_blocks) - return -ENOMEM; - - i915_global_register(&global.base); - return 0; -} - -static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_block *parent, - unsigned int order, - u64 offset) -{ - struct i915_buddy_block *block; - - GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); - - block = kmem_cache_zalloc(global.slab_blocks, GFP_KERNEL); - if (!block) - return NULL; - - block->header = offset; - block->header |= order; - block->parent = parent; - - GEM_BUG_ON(block->header & I915_BUDDY_HEADER_UNUSED); - return block; -} - -static void i915_block_free(struct i915_buddy_block *block) -{ - kmem_cache_free(global.slab_blocks, block); -} - -static void mark_allocated(struct i915_buddy_block *block) -{ - block->header &= ~I915_BUDDY_HEADER_STATE; - block->header |= I915_BUDDY_ALLOCATED; - - list_del(&block->link); -} - -static void mark_free(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - block->header &= ~I915_BUDDY_HEADER_STATE; - block->header |= I915_BUDDY_FREE; - - list_add(&block->link, - &mm->free_list[i915_buddy_block_order(block)]); -} - -static void mark_split(struct i915_buddy_block *block) -{ - block->header &= ~I915_BUDDY_HEADER_STATE; - block->header |= I915_BUDDY_SPLIT; - - list_del(&block->link); -} - -int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) -{ - unsigned int i; - u64 offset; - - if (size < chunk_size) - return -EINVAL; - - if (chunk_size < PAGE_SIZE) - return -EINVAL; - - if (!is_power_of_2(chunk_size)) - return -EINVAL; - - size = round_down(size, chunk_size); - - mm->size = size; - mm->chunk_size = chunk_size; - mm->max_order = ilog2(size) - ilog2(chunk_size); - - GEM_BUG_ON(mm->max_order > I915_BUDDY_MAX_ORDER); - - mm->free_list = kmalloc_array(mm->max_order + 1, - sizeof(struct list_head), - GFP_KERNEL); - if (!mm->free_list) - return -ENOMEM; - - for (i = 0; i <= mm->max_order; ++i) - INIT_LIST_HEAD(&mm->free_list[i]); - - mm->n_roots = hweight64(size); - - mm->roots = kmalloc_array(mm->n_roots, - sizeof(struct i915_buddy_block *), - GFP_KERNEL); - if (!mm->roots) - goto out_free_list; - - offset = 0; - i = 0; - - /* - * Split into power-of-two blocks, in case we are given a size that is - * not itself a power-of-two. - */ - do { - struct i915_buddy_block *root; - unsigned int order; - u64 root_size; - - root_size = rounddown_pow_of_two(size); - order = ilog2(root_size) - ilog2(chunk_size); - - root = i915_block_alloc(NULL, order, offset); - if (!root) - goto out_free_roots; - - mark_free(mm, root); - - GEM_BUG_ON(i > mm->max_order); - GEM_BUG_ON(i915_buddy_block_size(mm, root) < chunk_size); - - mm->roots[i] = root; - - offset += root_size; - size -= root_size; - i++; - } while (size); - - return 0; - -out_free_roots: - while (i--) - i915_block_free(mm->roots[i]); - kfree(mm->roots); -out_free_list: - kfree(mm->free_list); - return -ENOMEM; -} - -void i915_buddy_fini(struct i915_buddy_mm *mm) -{ - int i; - - for (i = 0; i < mm->n_roots; ++i) { - GEM_WARN_ON(!i915_buddy_block_is_free(mm->roots[i])); - i915_block_free(mm->roots[i]); - } - - kfree(mm->roots); - kfree(mm->free_list); -} - -static int split_block(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - unsigned int block_order = i915_buddy_block_order(block) - 1; - u64 offset = i915_buddy_block_offset(block); - - GEM_BUG_ON(!i915_buddy_block_is_free(block)); - GEM_BUG_ON(!i915_buddy_block_order(block)); - - block->left = i915_block_alloc(block, block_order, offset); - if (!block->left) - return -ENOMEM; - - block->right = i915_block_alloc(block, block_order, - offset + (mm->chunk_size << block_order)); - if (!block->right) { - i915_block_free(block->left); - return -ENOMEM; - } - - mark_free(mm, block->left); - mark_free(mm, block->right); - - mark_split(block); - - return 0; -} - -static struct i915_buddy_block * -get_buddy(struct i915_buddy_block *block) -{ - struct i915_buddy_block *parent; - - parent = block->parent; - if (!parent) - return NULL; - - if (parent->left == block) - return parent->right; - - return parent->left; -} - -static void __i915_buddy_free(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - struct i915_buddy_block *parent; - - while ((parent = block->parent)) { - struct i915_buddy_block *buddy; - - buddy = get_buddy(block); - - if (!i915_buddy_block_is_free(buddy)) - break; - - list_del(&buddy->link); - - i915_block_free(block); - i915_block_free(buddy); - - block = parent; - } - - mark_free(mm, block); -} - -void i915_buddy_free(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); - __i915_buddy_free(mm, block); -} - -void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects) -{ - struct i915_buddy_block *block, *on; - - list_for_each_entry_safe(block, on, objects, link) { - i915_buddy_free(mm, block); - cond_resched(); - } - INIT_LIST_HEAD(objects); -} - -/* - * Allocate power-of-two block. The order value here translates to: - * - * 0 = 2^0 * mm->chunk_size - * 1 = 2^1 * mm->chunk_size - * 2 = 2^2 * mm->chunk_size - * ... - */ -struct i915_buddy_block * -i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) -{ - struct i915_buddy_block *block = NULL; - unsigned int i; - int err; - - for (i = order; i <= mm->max_order; ++i) { - block = list_first_entry_or_null(&mm->free_list[i], - struct i915_buddy_block, - link); - if (block) - break; - } - - if (!block) - return ERR_PTR(-ENOSPC); - - GEM_BUG_ON(!i915_buddy_block_is_free(block)); - - while (i != order) { - err = split_block(mm, block); - if (unlikely(err)) - goto out_free; - - /* Go low */ - block = block->left; - i--; - } - - mark_allocated(block); - kmemleak_update_trace(block); - return block; - -out_free: - if (i != order) - __i915_buddy_free(mm, block); - return ERR_PTR(err); -} - -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) -{ - return s1 <= e2 && e1 >= s2; -} - -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) -{ - return s1 <= s2 && e1 >= e2; -} - -/* - * Allocate range. Note that it's safe to chain together multiple alloc_ranges - * with the same blocks list. - * - * Intended for pre-allocating portions of the address space, for example to - * reserve a block for the initial framebuffer or similar, hence the expectation - * here is that i915_buddy_alloc() is still the main vehicle for - * allocations, so if that's not the case then the drm_mm range allocator is - * probably a much better fit, and so you should probably go use that instead. - */ -int i915_buddy_alloc_range(struct i915_buddy_mm *mm, - struct list_head *blocks, - u64 start, u64 size) -{ - struct i915_buddy_block *block; - struct i915_buddy_block *buddy; - LIST_HEAD(allocated); - LIST_HEAD(dfs); - u64 end; - int err; - int i; - - if (size < mm->chunk_size) - return -EINVAL; - - if (!IS_ALIGNED(size | start, mm->chunk_size)) - return -EINVAL; - - if (range_overflows(start, size, mm->size)) - return -EINVAL; - - for (i = 0; i < mm->n_roots; ++i) - list_add_tail(&mm->roots[i]->tmp_link, &dfs); - - end = start + size - 1; - - do { - u64 block_start; - u64 block_end; - - block = list_first_entry_or_null(&dfs, - struct i915_buddy_block, - tmp_link); - if (!block) - break; - - list_del(&block->tmp_link); - - block_start = i915_buddy_block_offset(block); - block_end = block_start + i915_buddy_block_size(mm, block) - 1; - - if (!overlaps(start, end, block_start, block_end)) - continue; - - if (i915_buddy_block_is_allocated(block)) { - err = -ENOSPC; - goto err_free; - } - - if (contains(start, end, block_start, block_end)) { - if (!i915_buddy_block_is_free(block)) { - err = -ENOSPC; - goto err_free; - } - - mark_allocated(block); - list_add_tail(&block->link, &allocated); - continue; - } - - if (!i915_buddy_block_is_split(block)) { - err = split_block(mm, block); - if (unlikely(err)) - goto err_undo; - } - - list_add(&block->right->tmp_link, &dfs); - list_add(&block->left->tmp_link, &dfs); - } while (1); - - list_splice_tail(&allocated, blocks); - return 0; - -err_undo: - /* - * We really don't want to leave around a bunch of split blocks, since - * bigger is better, so make sure we merge everything back before we - * free the allocated blocks. - */ - buddy = get_buddy(block); - if (buddy && - (i915_buddy_block_is_free(block) && - i915_buddy_block_is_free(buddy))) - __i915_buddy_free(mm, block); - -err_free: - i915_buddy_free_list(mm, &allocated); - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_buddy.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h deleted file mode 100644 index 9ce5200f4001..000000000000 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ /dev/null @@ -1,131 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_BUDDY_H__ -#define __I915_BUDDY_H__ - -#include <linux/bitops.h> -#include <linux/list.h> - -struct i915_buddy_block { -#define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) -#define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) -#define I915_BUDDY_ALLOCATED (1 << 10) -#define I915_BUDDY_FREE (2 << 10) -#define I915_BUDDY_SPLIT (3 << 10) -/* Free to be used, if needed in the future */ -#define I915_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6) -#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) - u64 header; - - struct i915_buddy_block *left; - struct i915_buddy_block *right; - struct i915_buddy_block *parent; - - void *private; /* owned by creator */ - - /* - * While the block is allocated by the user through i915_buddy_alloc*, - * the user has ownership of the link, for example to maintain within - * a list, if so desired. As soon as the block is freed with - * i915_buddy_free* ownership is given back to the mm. - */ - struct list_head link; - struct list_head tmp_link; -}; - -/* Order-zero must be at least PAGE_SIZE */ -#define I915_BUDDY_MAX_ORDER (63 - PAGE_SHIFT) - -/* - * Binary Buddy System. - * - * Locking should be handled by the user, a simple mutex around - * i915_buddy_alloc* and i915_buddy_free* should suffice. - */ -struct i915_buddy_mm { - /* Maintain a free list for each order. */ - struct list_head *free_list; - - /* - * Maintain explicit binary tree(s) to track the allocation of the - * address space. This gives us a simple way of finding a buddy block - * and performing the potentially recursive merge step when freeing a - * block. Nodes are either allocated or free, in which case they will - * also exist on the respective free list. - */ - struct i915_buddy_block **roots; - - /* - * Anything from here is public, and remains static for the lifetime of - * the mm. Everything above is considered do-not-touch. - */ - unsigned int n_roots; - unsigned int max_order; - - /* Must be at least PAGE_SIZE */ - u64 chunk_size; - u64 size; -}; - -static inline u64 -i915_buddy_block_offset(struct i915_buddy_block *block) -{ - return block->header & I915_BUDDY_HEADER_OFFSET; -} - -static inline unsigned int -i915_buddy_block_order(struct i915_buddy_block *block) -{ - return block->header & I915_BUDDY_HEADER_ORDER; -} - -static inline unsigned int -i915_buddy_block_state(struct i915_buddy_block *block) -{ - return block->header & I915_BUDDY_HEADER_STATE; -} - -static inline bool -i915_buddy_block_is_allocated(struct i915_buddy_block *block) -{ - return i915_buddy_block_state(block) == I915_BUDDY_ALLOCATED; -} - -static inline bool -i915_buddy_block_is_free(struct i915_buddy_block *block) -{ - return i915_buddy_block_state(block) == I915_BUDDY_FREE; -} - -static inline bool -i915_buddy_block_is_split(struct i915_buddy_block *block) -{ - return i915_buddy_block_state(block) == I915_BUDDY_SPLIT; -} - -static inline u64 -i915_buddy_block_size(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - return mm->chunk_size << i915_buddy_block_order(block); -} - -int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size); - -void i915_buddy_fini(struct i915_buddy_mm *mm); - -struct i915_buddy_block * -i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order); - -int i915_buddy_alloc_range(struct i915_buddy_mm *mm, - struct list_head *blocks, - u64 start, u64 size); - -void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); - -void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); - -#endif diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index c1167bc15964..3992c25a191d 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1369,6 +1369,20 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } +/** + * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser() + * @batch_length: length of the commands in batch_obj + * @trampoline: Whether jump trampolines are used. + * + * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser(). + * This has to be preallocated, because the command parser runs in signaling context, + * and may not allocate any memory. + * + * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use + * IS_ERR() to check for errors. Must bre freed() with kfree(). + * + * NULL is a valid value, meaning no allocation was required. + */ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, bool trampoline) { @@ -1401,7 +1415,9 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question - * @trampoline: whether to emit a conditional trampoline at the end of the batch + * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist() + * @shadow_map: mapping to @shadow vma + * @batch_map: mapping to @batch vma * * Parses the specified batch buffer looking for privilege violations as * described in the overview. diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b5facdd5edec..cc745751ac53 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -633,9 +633,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data) seq_printf(m, "DDC2 = 0x%08x\n", intel_uncore_read(uncore, DCC2)); seq_printf(m, "C0DRB3 = 0x%04x\n", - intel_uncore_read16(uncore, C0DRB3)); + intel_uncore_read16(uncore, C0DRB3_BW)); seq_printf(m, "C1DRB3 = 0x%04x\n", - intel_uncore_read16(uncore, C1DRB3)); + intel_uncore_read16(uncore, C1DRB3_BW)); } else if (INTEL_GEN(dev_priv) >= 6) { seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n", intel_uncore_read(uncore, MAD_DIMM_C0)); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6817c612eb78..850b499c71c8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -84,6 +84,7 @@ #include "intel_gvt.h" #include "intel_memory_region.h" #include "intel_pm.h" +#include "intel_region_ttm.h" #include "intel_sideband.h" #include "vlv_suspend.h" @@ -335,6 +336,10 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) if (ret < 0) goto err_workqueues; + ret = intel_region_ttm_device_init(dev_priv); + if (ret) + goto err_ttm; + intel_wopcm_init_early(&dev_priv->wopcm); intel_gt_init_early(&dev_priv->gt, dev_priv); @@ -359,6 +364,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) err_gem: i915_gem_cleanup_early(dev_priv); intel_gt_driver_late_release(&dev_priv->gt); + intel_region_ttm_device_fini(dev_priv); +err_ttm: vlv_suspend_cleanup(dev_priv); err_workqueues: i915_workqueues_cleanup(dev_priv); @@ -376,6 +383,7 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) intel_power_domains_cleanup(dev_priv); i915_gem_cleanup_early(dev_priv); intel_gt_driver_late_release(&dev_priv->gt); + intel_region_ttm_device_fini(dev_priv); vlv_suspend_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); @@ -631,6 +639,8 @@ err_mem_regions: intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); + i915_gem_drain_freed_objects(dev_priv); + i915_ggtt_driver_late_release(dev_priv); err_perf: i915_perf_fini(dev_priv); return ret; @@ -880,6 +890,8 @@ out_cleanup_hw: i915_driver_hw_remove(i915); intel_memory_regions_driver_release(i915); i915_ggtt_driver_release(i915); + i915_gem_drain_freed_objects(i915); + i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); out_runtime_pm_put: @@ -936,6 +948,7 @@ static void i915_driver_release(struct drm_device *dev) intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + i915_ggtt_driver_late_release(dev_priv); i915_driver_mmio_release(dev_priv); @@ -1727,6 +1740,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_CREATE_EXT, i915_gem_create_ext_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 91e324c99298..6a0f2e6a3317 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -51,7 +51,6 @@ #include <linux/xarray.h> #include <drm/intel-gtt.h> -#include <drm/drm_legacy.h> /* for struct drm_dma_handle */ #include <drm/drm_gem.h> #include <drm/drm_auth.h> #include <drm/drm_cache.h> @@ -60,6 +59,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_connector.h> #include <drm/i915_mei_hdcp_interface.h> +#include <drm/ttm/ttm_device.h> #include "i915_params.h" #include "i915_reg.h" @@ -79,6 +79,7 @@ #include "gem/i915_gem_context_types.h" #include "gem/i915_gem_shrinker.h" #include "gem/i915_gem_stolen.h" +#include "gem/i915_gem_lmem.h" #include "gt/intel_engine.h" #include "gt/intel_gt_types.h" @@ -502,6 +503,13 @@ struct intel_l3_parity { }; struct i915_gem_mm { + /* + * Shortcut for the stolen region. This points to either + * INTEL_REGION_STOLEN_SMEM for integrated platforms, or + * INTEL_REGION_STOLEN_LMEM for discrete, or NULL if the device doesn't + * support stolen. + */ + struct intel_memory_region *stolen_region; /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; /** Protects the usage of the GTT stolen memory allocator. This is @@ -759,6 +767,7 @@ struct intel_cdclk_config { struct i915_selftest_stash { atomic_t counter; + struct ida mock_region_instances; }; struct drm_i915_private { @@ -1151,6 +1160,9 @@ struct drm_i915_private { /* Mutex to protect the above hdcp component related values. */ struct mutex hdcp_comp_mutex; + /* The TTM device structure. */ + struct ttm_device bdev; + I915_SELFTEST_DECLARE(struct i915_selftest_stash selftest;) /* @@ -1713,9 +1725,15 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) } static inline bool -intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv) +intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) +{ + return IS_BROXTON(i915) && intel_vtd_active(); +} + +static inline bool +intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) { - return IS_BROXTON(dev_priv) && intel_vtd_active(); + return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); } /* i915_drv.c */ @@ -1737,7 +1755,8 @@ void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); -struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915); +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, + u16 type, u16 instance); static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { @@ -1795,6 +1814,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1) #define I915_GEM_OBJECT_UNBIND_TEST BIT(2) +#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3) void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); @@ -1914,6 +1934,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); /* i915_mm.c */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap); int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, struct scatterlist *sgl, resource_size_t iobase); @@ -1927,9 +1950,15 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) } static inline enum i915_map_type -i915_coherent_map_type(struct drm_i915_private *i915) +i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, bool always_coherent) { - return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; + if (i915_gem_object_is_lmem(obj)) + return I915_MAP_WC; + if (HAS_LLC(i915) || always_coherent) + return I915_MAP_WB; + else + return I915_MAP_WC; } #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6fd3ab61de37..589388dec48a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -157,8 +157,18 @@ try_again: if (vma) { ret = -EBUSY; if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || - !i915_vma_is_active(vma)) - ret = i915_vma_unbind(vma); + !i915_vma_is_active(vma)) { + if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) { + if (mutex_trylock(&vma->vm->mutex)) { + ret = __i915_vma_unbind(vma); + mutex_unlock(&vma->vm->mutex); + } else { + ret = -EBUSY; + } + } else { + ret = i915_vma_unbind(vma); + } + } __i915_vma_put(vma); } @@ -999,12 +1009,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.madv = args->madv; if (i915_gem_object_has_pages(obj)) { - struct list_head *list; + unsigned long flags; - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!list_empty(&obj->mm.link)) { + struct list_head *list; if (obj->mm.madv != I915_MADV_WILLNEED) list = &i915->mm.purge_list; @@ -1012,8 +1021,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, list = &i915->mm.shrink_list; list_move_tail(&obj->mm.link, list); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } /* if the object is no longer attached, discard its backing storage */ @@ -1099,6 +1108,7 @@ err_unlock: } i915_gem_drain_freed_objects(dev_priv); + return ret; } diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 3aa213684293..77f1911c463b 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -87,7 +87,6 @@ static void __i915_globals_cleanup(void) static __initconst int (* const initfn[])(void) = { i915_global_active_init, - i915_global_buddy_init, i915_global_context_init, i915_global_gem_context_init, i915_global_objects_init, diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index b2f5cd9b9b1a..2d199f411a4a 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -27,7 +27,6 @@ void i915_globals_exit(void); /* constructors */ int i915_global_active_init(void); -int i915_global_buddy_init(void); int i915_global_context_init(void); int i915_global_gem_context_init(void); int i915_global_objects_init(void); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index eb435f9e0220..b98d8cdbe4f2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -1,7 +1,7 @@ /* * SPDX-License-Identifier: MIT * - * Copyright � 2008-2018 Intel Corporation + * Copyright © 2008-2018 Intel Corporation */ #ifndef _I915_GPU_ERROR_H_ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0b7b14b6960e..3cb0a65a996b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4037,7 +4037,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]); + intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4145,7 +4145,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]); + intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4290,10 +4290,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]); + intel_engine_cs_irq(dev_priv->gt.engine[RCS0], + iir); if (iir & I915_BSD_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]); + intel_engine_cs_irq(dev_priv->gt.engine[VCS0], + iir >> 25); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 25576fa73ff0..666808cb3a32 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -28,10 +28,90 @@ #include "i915_drv.h" -#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) +struct remap_pfn { + struct mm_struct *mm; + unsigned long pfn; + pgprot_t prot; + + struct sgt_iter sgt; + resource_size_t iobase; +}; + +static int remap_pfn(pte_t *pte, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); + r->pfn++; + + return 0; +} #define use_dma(io) ((io) != -1) +static inline unsigned long sgt_pfn(const struct remap_pfn *r) +{ + if (use_dma(r->iobase)) + return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT; + else + return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT); +} + +static int remap_sg(pte_t *pte, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + if (GEM_WARN_ON(!r->sgt.sgp)) + return -EINVAL; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, + pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot))); + r->pfn++; /* track insertions in case we need to unwind later */ + + r->sgt.curr += PAGE_SIZE; + if (r->sgt.curr >= r->sgt.max) + r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase)); + + return 0; +} + +/** + * remap_io_mapping - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @iomap: the source io_mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap) +{ + struct remap_pfn r; + int err; + +#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); + + /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ + r.mm = vma->vm_mm; + r.pfn = pfn; + r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | + (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); + + err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT); + return err; + } + + return 0; +} + /** * remap_io_sg - remap an IO mapping to userspace * @vma: user vma to map to @@ -46,8 +126,13 @@ int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, struct scatterlist *sgl, resource_size_t iobase) { - unsigned long pfn, len, remapped = 0; - int err = 0; + struct remap_pfn r = { + .mm = vma->vm_mm, + .prot = vma->vm_page_prot, + .sgt = __sgt_iter(sgl, use_dma(iobase)), + .iobase = iobase, + }; + int err; /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); @@ -55,25 +140,11 @@ int remap_io_sg(struct vm_area_struct *vma, if (!use_dma(iobase)) flush_cache_range(vma, addr, size); - do { - if (use_dma(iobase)) { - if (!sg_dma_len(sgl)) - break; - pfn = (sg_dma_address(sgl) + iobase) >> PAGE_SHIFT; - len = sg_dma_len(sgl); - } else { - pfn = page_to_pfn(sg_page(sgl)); - len = sgl->length; - } - - err = remap_pfn_range(vma, addr + remapped, pfn, len, - vma->vm_page_prot); - if (err) - break; - remapped += len; - } while ((sgl = __sg_next(sgl))); - - if (err) - zap_vma_ptes(vma, addr, remapped); - return err; + err = apply_to_page_range(r.mm, addr, size, remap_sg, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT); + return err; + } + + return 0; } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 0320878d96b0..e07f4cfea63a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -160,7 +160,7 @@ i915_param_named_unsafe(edp_vswing, int, 0400, i915_param_named_unsafe(enable_guc, int, 0400, "Enable GuC load for GuC submission and/or HuC load. " "Required functionality can be selected using bitmask values. " - "(-1=auto, 0=disable [default], 1=GuC submission, 2=HuC load)"); + "(-1=auto [default], 0=disable, 1=GuC submission, 2=HuC load)"); i915_param_named(guc_log_level, int, 0400, "GuC firmware logging level. Requires GuC to be loaded. " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 14cd64cc61d0..f27eceb82c0f 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -59,7 +59,7 @@ struct drm_printer; param(int, disable_power_well, -1, 0400) \ param(int, enable_ips, 1, 0600) \ param(int, invert_brightness, 0, 0600) \ - param(int, enable_guc, 0, 0400) \ + param(int, enable_guc, -1, 0400) \ param(int, guc_log_level, -1, 0400) \ param(char *, guc_firmware_path, NULL, 0400) \ param(char *, huc_firmware_path, NULL, 0400) \ @@ -71,18 +71,18 @@ struct drm_printer; param(int, fastboot, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ - param(unsigned long, fake_lmem_start, 0, 0400) \ - param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, 0600) \ + param(unsigned long, fake_lmem_start, 0, IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) ? 0400 : 0) \ + param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, load_detect_test, false, 0600) \ param(bool, force_reset_modeset_test, false, 0600) \ - param(bool, error_capture, true, 0600) \ + param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \ param(bool, disable_display, false, 0400) \ param(bool, verbose_state_checks, true, 0) \ param(bool, nuclear_pageflip, false, 0400) \ param(bool, enable_dp_mst, true, 0600) \ - param(bool, enable_gvt, false, 0400) + param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) #define MEMBER(T, member, ...) T member; struct i915_params { diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9b517f79b9ca..7030e563985c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -906,7 +906,7 @@ static const struct intel_device_info rkl_info = { }; #define DGFX_FEATURES \ - .memory_regions = REGION_SMEM | REGION_LMEM, \ + .memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \ .has_master_unit_irq = 1, \ .has_llc = 0, \ .has_snoop = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 6f79db3ef38c..9f94914958c3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1257,11 +1257,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: case 10: - if (intel_engine_in_execlists_submission_mode(ce->engine)) { - stream->specific_ctx_id_mask = - (1U << GEN8_CTX_ID_WIDTH) - 1; - stream->specific_ctx_id = stream->specific_ctx_id_mask; - } else { + if (intel_engine_uses_guc(ce->engine)) { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1280,6 +1276,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ stream->specific_ctx_id_mask = (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; + } else { + stream->specific_ctx_id_mask = + (1U << GEN8_CTX_ID_WIDTH) - 1; + stream->specific_ctx_id = stream->specific_ctx_id_mask; } break; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 8164cd763737..34d37d46a126 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -476,6 +476,8 @@ engine_event_status(struct intel_engine_cs *engine, static int config_status(struct drm_i915_private *i915, u64 config) { + struct intel_gt *gt = &i915->gt; + switch (config) { case I915_PMU_ACTUAL_FREQUENCY: if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) @@ -489,7 +491,7 @@ config_status(struct drm_i915_private *i915, u64 config) case I915_PMU_INTERRUPTS: break; case I915_PMU_RC6_RESIDENCY: - if (!HAS_RC6(i915)) + if (!gt->rc6.supported) return -ENODEV; break; case I915_PMU_SOFTWARE_GT_AWAKE_TIME: @@ -834,15 +836,13 @@ static ssize_t i915_pmu_event_show(struct device *dev, return sprintf(buf, "config=0x%lx\n", eattr->val); } -static ssize_t -i915_pmu_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) { return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); } -static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); +static DEVICE_ATTR_RO(cpumask); static struct attribute *i915_cpumask_attrs[] = { &dev_attr_cpumask.attr, diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index fed337ad7b68..e49da36c62fb 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -419,11 +419,73 @@ static int query_perf_config(struct drm_i915_private *i915, } } +static int query_memregion_info(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_memory_regions __user *query_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_memory_region_info __user *info_ptr = + &query_ptr->regions[0]; + struct drm_i915_memory_region_info info = { }; + struct drm_i915_query_memory_regions query; + struct intel_memory_region *mr; + u32 total_length; + int ret, id, i; + + if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) + return -ENODEV; + + if (query_item->flags != 0) + return -EINVAL; + + total_length = sizeof(query); + for_each_memory_region(mr, i915, id) { + if (mr->private) + continue; + + total_length += sizeof(info); + } + + ret = copy_query_item(&query, sizeof(query), total_length, query_item); + if (ret != 0) + return ret; + + if (query.num_regions) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(query.rsvd); i++) { + if (query.rsvd[i]) + return -EINVAL; + } + + for_each_memory_region(mr, i915, id) { + if (mr->private) + continue; + + info.region.memory_class = mr->type; + info.region.memory_instance = mr->instance; + info.probed_size = mr->total; + info.unallocated_size = mr->avail; + + if (__copy_to_user(info_ptr, &info, sizeof(info))) + return -EFAULT; + + query.num_regions++; + info_ptr++; + } + + if (__copy_to_user(query_ptr, &query, sizeof(query))) + return -EFAULT; + + return total_length; +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, query_perf_config, + query_memregion_info, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 10856e356be0..65c155b14189 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -416,6 +416,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_VECS_SFC_USAGE(engine) _MMIO((engine)->mmio_base + 0x2014) #define GEN11_VECS_SFC_USAGE_BIT (1 << 0) +#define GEN12_HCP_SFC_FORCED_LOCK(engine) _MMIO((engine)->mmio_base + 0x2910) +#define GEN12_HCP_SFC_FORCED_LOCK_BIT REG_BIT(0) +#define GEN12_HCP_SFC_LOCK_STATUS(engine) _MMIO((engine)->mmio_base + 0x2914) +#define GEN12_HCP_SFC_LOCK_ACK_BIT REG_BIT(1) +#define GEN12_HCP_SFC_USAGE_BIT REG_BIT(0) + #define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) #define GEN12_SFC_DONE_MAX 4 @@ -487,6 +493,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAB_CTL _MMIO(0x24000) #define GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8) +#define GU_CNTL _MMIO(0x101010) +#define LMEM_INIT REG_BIT(7) + #define GEN6_STOLEN_RESERVED _MMIO(0x1082C0) #define GEN6_STOLEN_RESERVED_ADDR_MASK (0xFFF << 20) #define GEN7_STOLEN_RESERVED_ADDR_MASK (0x3FFF << 18) @@ -2715,6 +2724,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */ #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ +#define VDBOX_CGCTL3F10(base) _MMIO((base) + 0x3f10) +#define IECPUNIT_CLKGATE_DIS REG_BIT(22) + #define ERROR_GEN6 _MMIO(0x40a0) #define GEN7_ERR_INT _MMIO(0x44040) #define ERR_INT_POISON (1 << 31) @@ -3790,8 +3802,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define CSHRDDR3CTL_DDR3 (1 << 2) /* 965 MCH register controlling DRAM channel configuration */ -#define C0DRB3 _MMIO(MCHBAR_MIRROR_BASE + 0x206) -#define C1DRB3 _MMIO(MCHBAR_MIRROR_BASE + 0x606) +#define C0DRB3_BW _MMIO(MCHBAR_MIRROR_BASE + 0x206) +#define C1DRB3_BW _MMIO(MCHBAR_MIRROR_BASE + 0x606) /* snb MCH registers for reading the DRAM channel configuration */ #define MAD_DIMM_C0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5004) @@ -12315,6 +12327,7 @@ enum skl_power_gate { #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define GEN12_GSMBASE _MMIO(0x108100) +#define GEN12_DSMBASE _MMIO(0x1080C0) /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c8a6ed8617f3..1014c71cf7f5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -929,7 +929,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) u32 seqno; int ret; - might_sleep_if(gfpflags_allow_blocking(gfp)); + might_alloc(gfp); /* Check that the caller provided an already pinned context */ __intel_context_pin(ce); @@ -1594,8 +1594,8 @@ i915_request_await_object(struct i915_request *to, struct dma_fence **shared; unsigned int count, i; - ret = dma_resv_get_fences_rcu(obj->base.resv, - &excl, &count, &shared); + ret = dma_resv_get_fences(obj->base.resv, &excl, &count, + &shared); if (ret) return ret; @@ -1611,7 +1611,7 @@ i915_request_await_object(struct i915_request *to, dma_fence_put(shared[i]); kfree(shared); } else { - excl = dma_resv_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_unlocked(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index cc6b3846a8c7..69e9e6c3135e 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -6,6 +6,10 @@ #include "i915_scatterlist.h" +#include <drm/drm_mm.h> + +#include <linux/slab.h> + bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; @@ -34,6 +38,72 @@ bool i915_sg_trim(struct sg_table *orig_st) return true; } +/** + * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node + * @node: The drm_mm_node. + * @region_start: An offset to add to the dma addresses of the sg list. + * + * Create a struct sg_table, initializing it from a struct drm_mm_node, + * taking a maximum segment length into account, splitting into segments + * if necessary. + * + * Return: A pointer to a kmalloced struct sg_table on success, negative + * error code cast to an error pointer on failure. + */ +struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, + u64 region_start) +{ + const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ + u64 segment_pages = max_segment >> PAGE_SHIFT; + u64 block_size, offset, prev_end; + struct sg_table *st; + struct scatterlist *sg; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), + GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + st->nents = 0; + prev_end = (resource_size_t)-1; + block_size = node->size << PAGE_SHIFT; + offset = node->start << PAGE_SHIFT; + + while (block_size) { + u64 len; + + if (offset != prev_end || sg->length >= max_segment) { + if (st->nents) + sg = __sg_next(sg); + + sg_dma_address(sg) = region_start + offset; + sg_dma_len(sg) = 0; + sg->length = 0; + st->nents++; + } + + len = min(block_size, max_segment - sg->length); + sg->length += len; + sg_dma_len(sg) += len; + + offset += len; + block_size -= len; + + prev_end = offset; + } + + sg_mark_end(sg); + i915_sg_trim(st); + + return st; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #endif diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 9cb26a224034..5acca45ea981 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -13,6 +13,8 @@ #include "i915_gem.h" +struct drm_mm_node; + /* * Optimised SGL iterator for GEM objects */ @@ -101,15 +103,23 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) -static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) +/** + * i915_sg_dma_sizes - Record the dma segment sizes of a scatterlist + * @sg: The scatterlist + * + * Return: An unsigned int with segment sizes logically or'ed together. + * A caller can use this information to determine what hardware page table + * entry sizes can be used to map the memory represented by the scatterlist. + */ +static inline unsigned int i915_sg_dma_sizes(struct scatterlist *sg) { unsigned int page_sizes; page_sizes = 0; - while (sg) { + while (sg && sg_dma_len(sg)) { GEM_BUG_ON(sg->offset); - GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE)); - page_sizes |= sg->length; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_len(sg), PAGE_SIZE)); + page_sizes |= sg_dma_len(sg); sg = __sg_next(sg); } @@ -133,4 +143,6 @@ static inline unsigned int i915_sg_segment_size(void) bool i915_sg_trim(struct sg_table *orig_st); +struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, + u64 region_start); #endif diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2744558f3050..c589a681da77 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -582,7 +582,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct dma_fence **shared; unsigned int count, i; - ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); + ret = dma_resv_get_fences(resv, &excl, &count, &shared); if (ret) return ret; @@ -606,7 +606,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, dma_fence_put(shared[i]); kfree(shared); } else { - excl = dma_resv_get_excl_rcu(resv); + excl = dma_resv_get_excl_unlocked(resv); } if (ret >= 0 && excl && excl->ops != exclude) { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index b099e09ccc32..873bf996ceb5 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -58,8 +58,8 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, return DIV_ROUND_CLOSEST_ULL(res, 1000); } -static ssize_t -show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) +static ssize_t rc6_enable_show(struct device *kdev, + struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); unsigned int mask; @@ -75,43 +75,43 @@ show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) return sysfs_emit(buf, "%x\n", mask); } -static ssize_t -show_rc6_ms(struct device *kdev, struct device_attribute *attr, char *buf) +static ssize_t rc6_residency_ms_show(struct device *kdev, + struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6); return sysfs_emit(buf, "%u\n", rc6_residency); } -static ssize_t -show_rc6p_ms(struct device *kdev, struct device_attribute *attr, char *buf) +static ssize_t rc6p_residency_ms_show(struct device *kdev, + struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6p_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6p); return sysfs_emit(buf, "%u\n", rc6p_residency); } -static ssize_t -show_rc6pp_ms(struct device *kdev, struct device_attribute *attr, char *buf) +static ssize_t rc6pp_residency_ms_show(struct device *kdev, + struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6pp_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6pp); return sysfs_emit(buf, "%u\n", rc6pp_residency); } -static ssize_t -show_media_rc6_ms(struct device *kdev, struct device_attribute *attr, char *buf) +static ssize_t media_rc6_residency_ms_show(struct device *kdev, + struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6_residency = calc_residency(dev_priv, VLV_GT_MEDIA_RC6); return sysfs_emit(buf, "%u\n", rc6_residency); } -static DEVICE_ATTR(rc6_enable, S_IRUGO, show_rc6_mask, NULL); -static DEVICE_ATTR(rc6_residency_ms, S_IRUGO, show_rc6_ms, NULL); -static DEVICE_ATTR(rc6p_residency_ms, S_IRUGO, show_rc6p_ms, NULL); -static DEVICE_ATTR(rc6pp_residency_ms, S_IRUGO, show_rc6pp_ms, NULL); -static DEVICE_ATTR(media_rc6_residency_ms, S_IRUGO, show_media_rc6_ms, NULL); +static DEVICE_ATTR_RO(rc6_enable); +static DEVICE_ATTR_RO(rc6_residency_ms); +static DEVICE_ATTR_RO(rc6p_residency_ms); +static DEVICE_ATTR_RO(rc6pp_residency_ms); +static DEVICE_ATTR_RO(media_rc6_residency_ms); static struct attribute *rc6_attrs[] = { &dev_attr_rc6_enable.attr, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index f02f52ab5070..5259edacde38 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -201,6 +201,11 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size) __T; \ }) +static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) +{ + return a - b; +} + /* * container_of_user: Extract the superclass from a pointer to a member. * diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 37a00992cec5..0f227f28b280 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -27,6 +27,7 @@ #include "display/intel_frontbuffer.h" +#include "gem/i915_gem_lmem.h" #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" @@ -93,7 +94,6 @@ static int __i915_vma_active(struct i915_active *ref) return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; } -__i915_active_call static void __i915_vma_retire(struct i915_active *ref) { i915_vma_put(active_to_vma(ref)); @@ -124,7 +124,7 @@ vma_create(struct drm_i915_gem_object *obj, vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire); + i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire, 0); /* Declare ourselves safe for use inside shrinkers */ if (IS_ENABLED(CONFIG_LOCKDEP)) { @@ -448,9 +448,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void __iomem *ptr; int err; - if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { - err = -ENODEV; - goto err; + if (!i915_gem_object_is_lmem(vma->obj)) { + if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + err = -ENODEV; + goto err; + } } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); @@ -458,9 +460,19 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, - vma->node.start, - vma->node.size); + /* + * TODO: consider just using i915_gem_object_pin_map() for lmem + * instead, which already supports mapping non-contiguous chunks + * of pages, that way we can also drop the + * I915_BO_ALLOC_CONTIGUOUS when allocating the object. + */ + if (i915_gem_object_is_lmem(vma->obj)) + ptr = i915_gem_object_lmem_io_map(vma->obj, 0, + vma->obj->base.size); + else + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, + vma->node.start, + vma->node.size); if (ptr == NULL) { err = -ENOMEM; goto err; @@ -788,32 +800,37 @@ unpinned: static int vma_get_pages(struct i915_vma *vma) { int err = 0; + bool pinned_pages = false; if (atomic_add_unless(&vma->pages_count, 1, 0)) return 0; + if (vma->obj) { + err = i915_gem_object_pin_pages(vma->obj); + if (err) + return err; + pinned_pages = true; + } + /* Allocations ahoy! */ - if (mutex_lock_interruptible(&vma->pages_mutex)) - return -EINTR; + if (mutex_lock_interruptible(&vma->pages_mutex)) { + err = -EINTR; + goto unpin; + } if (!atomic_read(&vma->pages_count)) { - if (vma->obj) { - err = i915_gem_object_pin_pages(vma->obj); - if (err) - goto unlock; - } - err = vma->ops->set_pages(vma); - if (err) { - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); + if (err) goto unlock; - } + pinned_pages = false; } atomic_inc(&vma->pages_count); unlock: mutex_unlock(&vma->pages_mutex); +unpin: + if (pinned_pages) + __i915_gem_object_unpin_pages(vma->obj); return err; } @@ -905,8 +922,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) goto err_fence; - err = i915_vm_pin_pt_stash(vma->vm, - &work->stash); + err = i915_vm_map_pt_stash(vma->vm, &work->stash); if (err) goto err_fence; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index dc6926d89626..eca452a9851f 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -151,11 +151,6 @@ static inline void i915_vma_put(struct i915_vma *vma) i915_gem_object_put(vma->obj); } -static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) -{ - return a - b; -} - static inline long i915_vma_compare(struct i915_vma *vma, struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index bf837b6bb185..e6024eb7cca4 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -22,162 +22,102 @@ static const struct { .class = INTEL_MEMORY_STOLEN_SYSTEM, .instance = 0, }, + [INTEL_REGION_STOLEN_LMEM] = { + .class = INTEL_MEMORY_STOLEN_LOCAL, + .instance = 0, + }, +}; + +struct intel_region_reserve { + struct list_head link; + struct ttm_resource *res; }; struct intel_memory_region * -intel_memory_region_by_type(struct drm_i915_private *i915, - enum intel_memory_type mem_type) +intel_memory_region_lookup(struct drm_i915_private *i915, + u16 class, u16 instance) { struct intel_memory_region *mr; int id; - for_each_memory_region(mr, i915, id) - if (mr->type == mem_type) + /* XXX: consider maybe converting to an rb tree at some point */ + for_each_memory_region(mr, i915, id) { + if (mr->type == class && mr->instance == instance) return mr; - - return NULL; -} - -static u64 -intel_memory_region_free_pages(struct intel_memory_region *mem, - struct list_head *blocks) -{ - struct i915_buddy_block *block, *on; - u64 size = 0; - - list_for_each_entry_safe(block, on, blocks, link) { - size += i915_buddy_block_size(&mem->mm, block); - i915_buddy_free(&mem->mm, block); } - INIT_LIST_HEAD(blocks); - return size; + return NULL; } -void -__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, - struct list_head *blocks) +struct intel_memory_region * +intel_memory_region_by_type(struct drm_i915_private *i915, + enum intel_memory_type mem_type) { - mutex_lock(&mem->mm_lock); - mem->avail += intel_memory_region_free_pages(mem, blocks); - mutex_unlock(&mem->mm_lock); -} + struct intel_memory_region *mr; + int id; -void -__intel_memory_region_put_block_buddy(struct i915_buddy_block *block) -{ - struct list_head blocks; + for_each_memory_region(mr, i915, id) + if (mr->type == mem_type) + return mr; - INIT_LIST_HEAD(&blocks); - list_add(&block->link, &blocks); - __intel_memory_region_put_pages_buddy(block->private, &blocks); + return NULL; } -int -__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags, - struct list_head *blocks) +/** + * intel_memory_region_unreserve - Unreserve all previously reserved + * ranges + * @mem: The region containing the reserved ranges. + */ +void intel_memory_region_unreserve(struct intel_memory_region *mem) { - unsigned int min_order = 0; - unsigned long n_pages; - - GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); - GEM_BUG_ON(!list_empty(blocks)); - - if (flags & I915_ALLOC_MIN_PAGE_SIZE) { - min_order = ilog2(mem->min_page_size) - - ilog2(mem->mm.chunk_size); - } + struct intel_region_reserve *reserve, *next; - if (flags & I915_ALLOC_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_order = ilog2(size) - ilog2(mem->mm.chunk_size); - } - - if (size > mem->mm.size) - return -E2BIG; - - n_pages = size >> ilog2(mem->mm.chunk_size); + if (!mem->priv_ops || !mem->priv_ops->free) + return; mutex_lock(&mem->mm_lock); - - do { - struct i915_buddy_block *block; - unsigned int order; - - order = fls(n_pages) - 1; - GEM_BUG_ON(order > mem->mm.max_order); - GEM_BUG_ON(order < min_order); - - do { - block = i915_buddy_alloc(&mem->mm, order); - if (!IS_ERR(block)) - break; - - if (order-- == min_order) - goto err_free_blocks; - } while (1); - - n_pages -= BIT(order); - - block->private = mem; - list_add_tail(&block->link, blocks); - - if (!n_pages) - break; - } while (1); - - mem->avail -= size; - mutex_unlock(&mem->mm_lock); - return 0; - -err_free_blocks: - intel_memory_region_free_pages(mem, blocks); + list_for_each_entry_safe(reserve, next, &mem->reserved, link) { + list_del(&reserve->link); + mem->priv_ops->free(mem, reserve->res); + kfree(reserve); + } mutex_unlock(&mem->mm_lock); - return -ENXIO; } -struct i915_buddy_block * -__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +/** + * intel_memory_region_reserve - Reserve a memory range + * @mem: The region for which we want to reserve a range. + * @offset: Start of the range to reserve. + * @size: The size of the range to reserve. + * + * Return: 0 on success, negative error code on failure. + */ +int intel_memory_region_reserve(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size) { - struct i915_buddy_block *block; - LIST_HEAD(blocks); int ret; + struct intel_region_reserve *reserve; - ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks); - if (ret) - return ERR_PTR(ret); - - block = list_first_entry(&blocks, typeof(*block), link); - list_del_init(&block->link); - return block; -} - -int intel_memory_region_init_buddy(struct intel_memory_region *mem) -{ - return i915_buddy_init(&mem->mm, resource_size(&mem->region), - PAGE_SIZE); -} + if (!mem->priv_ops || !mem->priv_ops->reserve) + return -EINVAL; -void intel_memory_region_release_buddy(struct intel_memory_region *mem) -{ - i915_buddy_free_list(&mem->mm, &mem->reserved); - i915_buddy_fini(&mem->mm); -} + reserve = kzalloc(sizeof(*reserve), GFP_KERNEL); + if (!reserve) + return -ENOMEM; -int intel_memory_region_reserve(struct intel_memory_region *mem, - u64 offset, u64 size) -{ - int ret; + reserve->res = mem->priv_ops->reserve(mem, offset, size); + if (IS_ERR(reserve->res)) { + ret = PTR_ERR(reserve->res); + kfree(reserve); + return ret; + } mutex_lock(&mem->mm_lock); - ret = i915_buddy_alloc_range(&mem->mm, &mem->reserved, offset, size); + list_add_tail(&reserve->link, &mem->reserved); mutex_unlock(&mem->mm_lock); - return ret; + return 0; } struct intel_memory_region * @@ -186,6 +126,8 @@ intel_memory_region_create(struct drm_i915_private *i915, resource_size_t size, resource_size_t min_page_size, resource_size_t io_start, + u16 type, + u16 instance, const struct intel_memory_region_ops *ops) { struct intel_memory_region *mem; @@ -202,6 +144,8 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->ops = ops; mem->total = size; mem->avail = mem->total; + mem->type = type; + mem->instance = instance; mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); @@ -239,6 +183,7 @@ static void __intel_memory_region_destroy(struct kref *kref) struct intel_memory_region *mem = container_of(kref, typeof(*mem), kref); + intel_memory_region_unreserve(mem); if (mem->ops->release) mem->ops->release(mem); @@ -276,10 +221,17 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) instance = intel_region_map[i].instance; switch (type) { case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915); + mem = i915_gem_shmem_setup(i915, type, instance); + break; + case INTEL_MEMORY_STOLEN_LOCAL: + mem = i915_gem_stolen_lmem_setup(i915, type, instance); + if (!IS_ERR(mem)) + i915->mm.stolen_region = mem; break; case INTEL_MEMORY_STOLEN_SYSTEM: - mem = i915_gem_stolen_setup(i915); + mem = i915_gem_stolen_smem_setup(i915, type, instance); + if (!IS_ERR(mem)) + i915->mm.stolen_region = mem; break; default: continue; @@ -294,9 +246,6 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) } mem->id = i; - mem->type = type; - mem->instance = instance; - i915->mm.regions[i] = mem; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index edd49067c8ca..1f7dac63abb7 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -11,33 +11,34 @@ #include <linux/mutex.h> #include <linux/io-mapping.h> #include <drm/drm_mm.h> - -#include "i915_buddy.h" +#include <drm/i915_drm.h> struct drm_i915_private; struct drm_i915_gem_object; struct intel_memory_region; struct sg_table; +struct ttm_resource; -/** - * Base memory type - */ enum intel_memory_type { - INTEL_MEMORY_SYSTEM = 0, - INTEL_MEMORY_LOCAL, + INTEL_MEMORY_SYSTEM = I915_MEMORY_CLASS_SYSTEM, + INTEL_MEMORY_LOCAL = I915_MEMORY_CLASS_DEVICE, INTEL_MEMORY_STOLEN_SYSTEM, + INTEL_MEMORY_STOLEN_LOCAL, + INTEL_MEMORY_MOCK, }; enum intel_region_id { INTEL_REGION_SMEM = 0, INTEL_REGION_LMEM, INTEL_REGION_STOLEN_SMEM, + INTEL_REGION_STOLEN_LMEM, INTEL_REGION_UNKNOWN, /* Should be last */ }; #define REGION_SMEM BIT(INTEL_REGION_SMEM) #define REGION_LMEM BIT(INTEL_REGION_LMEM) #define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) +#define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) #define I915_ALLOC_CONTIGUOUS BIT(1) @@ -58,10 +59,19 @@ struct intel_memory_region_ops { unsigned int flags); }; +struct intel_memory_region_private_ops { + struct ttm_resource *(*reserve)(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size); + void (*free)(struct intel_memory_region *mem, + struct ttm_resource *res); +}; + struct intel_memory_region { struct drm_i915_private *i915; const struct intel_memory_region_ops *ops; + const struct intel_memory_region_private_ops *priv_ops; struct io_mapping iomap; struct resource region; @@ -69,7 +79,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct i915_buddy_mm mm; struct mutex mm_lock; struct kref kref; @@ -82,7 +91,8 @@ struct intel_memory_region { u16 type; u16 instance; enum intel_region_id id; - char name[8]; + char name[16]; + bool private; /* not for userspace */ struct list_head reserved; @@ -93,25 +103,17 @@ struct intel_memory_region { struct list_head list; struct list_head purgeable; } objects; -}; -int intel_memory_region_init_buddy(struct intel_memory_region *mem); -void intel_memory_region_release_buddy(struct intel_memory_region *mem); - -int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags, - struct list_head *blocks); -struct i915_buddy_block * -__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); -void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, - struct list_head *blocks); -void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block); + size_t chunk_size; + unsigned int max_order; + bool is_range_manager; -int intel_memory_region_reserve(struct intel_memory_region *mem, - u64 offset, u64 size); + void *region_private; +}; + +struct intel_memory_region * +intel_memory_region_lookup(struct drm_i915_private *i915, + u16 class, u16 instance); struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, @@ -119,6 +121,8 @@ intel_memory_region_create(struct drm_i915_private *i915, resource_size_t size, resource_size_t min_page_size, resource_size_t io_start, + u16 type, + u16 instance, const struct intel_memory_region_ops *ops); struct intel_memory_region * @@ -135,4 +139,9 @@ __printf(2, 3) void intel_memory_region_set_name(struct intel_memory_region *mem, const char *fmt, ...); +void intel_memory_region_unreserve(struct intel_memory_region *mem); + +int intel_memory_region_reserve(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size); #endif diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c new file mode 100644 index 000000000000..82a6727ede46 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_range_manager.h> + +#include "i915_drv.h" +#include "i915_scatterlist.h" + +#include "intel_region_ttm.h" + +/** + * DOC: TTM support structure + * + * The code in this file deals with setting up memory managers for TTM + * LMEM and MOCK regions and converting the output from + * the managers to struct sg_table, Basically providing the mapping from + * i915 GEM regions to TTM memory types and resource managers. + */ + +/* A Zero-initialized driver for now. We don't have a TTM backend yet. */ +static struct ttm_device_funcs i915_ttm_bo_driver; + +/** + * intel_region_ttm_device_init - Initialize a TTM device + * @dev_priv: Pointer to an i915 device private structure. + * + * Return: 0 on success, negative error code on failure. + */ +int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) +{ + struct drm_device *drm = &dev_priv->drm; + + return ttm_device_init(&dev_priv->bdev, &i915_ttm_bo_driver, + drm->dev, drm->anon_inode->i_mapping, + drm->vma_offset_manager, false, false); +} + +/** + * intel_region_ttm_device_fini - Finalize a TTM device + * @dev_priv: Pointer to an i915 device private structure. + */ +void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) +{ + ttm_device_fini(&dev_priv->bdev); +} + +/* + * Map the i915 memory regions to TTM memory types. We use the + * driver-private types for now, reserving TTM_PL_VRAM for stolen + * memory and TTM_PL_TT for GGTT use if decided to implement this. + */ +static int intel_region_to_ttm_type(struct intel_memory_region *mem) +{ + int type; + + GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && + mem->type != INTEL_MEMORY_MOCK); + + type = mem->instance + TTM_PL_PRIV; + GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); + + return type; +} + +static struct ttm_resource * +intel_region_ttm_node_reserve(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size) +{ + struct ttm_resource_manager *man = mem->region_private; + struct ttm_place place = {}; + struct ttm_buffer_object mock_bo = {}; + struct ttm_resource *res; + int ret; + + /* + * Having to use a mock_bo is unfortunate but stems from some + * drivers having private managers that insist to know what the + * allocate memory is intended for, using it to send private + * data to the manager. Also recently the bo has been used to send + * alignment info to the manager. Assume that apart from the latter, + * none of the managers we use will ever access the buffer object + * members, hoping we can pass the alignment info in the + * struct ttm_place in the future. + */ + + place.fpfn = offset >> PAGE_SHIFT; + place.lpfn = place.fpfn + (size >> PAGE_SHIFT); + mock_bo.base.size = size; + ret = man->func->alloc(man, &mock_bo, &place, &res); + if (ret == -ENOSPC) + ret = -ENXIO; + + return ret ? ERR_PTR(ret) : res; +} + +/** + * intel_region_ttm_node_free - Free a node allocated from a resource manager + * @mem: The region the node was allocated from. + * @node: The opaque node representing an allocation. + */ +void intel_region_ttm_node_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} + +static const struct intel_memory_region_private_ops priv_ops = { + .reserve = intel_region_ttm_node_reserve, + .free = intel_region_ttm_node_free, +}; + +int intel_region_ttm_init(struct intel_memory_region *mem) +{ + struct ttm_device *bdev = &mem->i915->bdev; + int mem_type = intel_region_to_ttm_type(mem); + int ret; + + ret = ttm_range_man_init(bdev, mem_type, false, + resource_size(&mem->region) >> PAGE_SHIFT); + if (ret) + return ret; + + mem->chunk_size = PAGE_SIZE; + mem->max_order = + get_order(rounddown_pow_of_two(resource_size(&mem->region))); + mem->is_range_manager = true; + mem->priv_ops = &priv_ops; + mem->region_private = ttm_manager_type(bdev, mem_type); + + return 0; +} + +/** + * intel_region_ttm_fini - Finalize a TTM region. + * @mem: The memory region + * + * This functions takes down the TTM resource manager associated with the + * memory region, and if it was registered with the TTM device, + * removes that registration. + */ +void intel_region_ttm_fini(struct intel_memory_region *mem) +{ + int ret; + + ret = ttm_range_man_fini(&mem->i915->bdev, + intel_region_to_ttm_type(mem)); + GEM_WARN_ON(ret); + mem->region_private = NULL; +} + +/** + * intel_region_ttm_node_to_st - Convert an opaque TTM resource manager node + * to an sg_table. + * @mem: The memory region. + * @node: The resource manager node obtained from the TTM resource manager. + * + * The gem backends typically use sg-tables for operations on the underlying + * io_memory. So provide a way for the backends to translate the + * nodes they are handed from TTM to sg-tables. + * + * Return: A malloced sg_table on success, an error pointer on failure. + */ +struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_range_mgr_node *range_node = + container_of(res, typeof(*range_node), base); + + GEM_WARN_ON(!mem->is_range_manager); + return i915_sg_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); +} + +/** + * intel_region_ttm_node_alloc - Allocate memory resources from a region + * @mem: The memory region, + * @size: The requested size in bytes + * @flags: Allocation flags + * + * This functionality is provided only for callers that need to allocate + * memory from standalone TTM range managers, without the TTM eviction + * functionality. Don't use if you are not completely sure that's the + * case. The returned opaque node can be converted to an sg_table using + * intel_region_ttm_node_to_st(), and can be freed using + * intel_region_ttm_node_free(). + * + * Return: A valid pointer on success, an error pointer on failure. + */ +struct ttm_resource * +intel_region_ttm_node_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct ttm_resource_manager *man = mem->region_private; + struct ttm_place place = {}; + struct ttm_buffer_object mock_bo = {}; + struct ttm_resource *res; + int ret; + + /* + * We ignore the flags for now since we're using the range + * manager and contigous and min page size would be fulfilled + * by default if size is min page size aligned. + */ + mock_bo.base.size = size; + + if (mem->is_range_manager) { + if (size >= SZ_1G) + mock_bo.page_alignment = SZ_1G >> PAGE_SHIFT; + else if (size >= SZ_2M) + mock_bo.page_alignment = SZ_2M >> PAGE_SHIFT; + else if (size >= SZ_64K) + mock_bo.page_alignment = SZ_64K >> PAGE_SHIFT; + } + + ret = man->func->alloc(man, &mock_bo, &place, &res); + if (ret == -ENOSPC) + ret = -ENXIO; + return ret ? ERR_PTR(ret) : res; +} diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h new file mode 100644 index 000000000000..11b0574ab791 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _INTEL_REGION_TTM_H_ +#define _INTEL_REGION_TTM_H_ + +#include <linux/types.h> + +#include "i915_selftest.h" + +struct drm_i915_private; +struct intel_memory_region; +struct ttm_resource; + +int intel_region_ttm_device_init(struct drm_i915_private *dev_priv); + +void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv); + +int intel_region_ttm_init(struct intel_memory_region *mem); + +void intel_region_ttm_fini(struct intel_memory_region *mem); + +struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, + struct ttm_resource *res); + +struct ttm_resource * +intel_region_ttm_node_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +void intel_region_ttm_node_free(struct intel_memory_region *mem, + struct ttm_resource *node); +#endif /* _INTEL_REGION_TTM_H_ */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 43701fc90a47..1bed8f666048 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1917,6 +1917,18 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) if (ret) return ret; + /* + * The boot firmware initializes local memory and assesses its health. + * If memory training fails, the punit will have been instructed to + * keep the GT powered down; we won't be able to communicate with it + * and we should not continue with driver initialization. + */ + if (IS_DGFX(i915) && + !(__raw_uncore_read32(uncore, GU_CNTL) & LMEM_INIT)) { + drm_err(&i915->drm, "LMEM not initialized by firmware\n"); + return -ENODEV; + } + if (INTEL_GEN(i915) > 5 && !intel_vgpu_active(i915)) uncore->flags |= UNCORE_HAS_FORCEWAKE; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 4002c984c2e0..61bf4560d8af 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915) return NULL; kref_init(&active->ref); - i915_active_init(&active->base, __live_active, __live_retire); + i915_active_init(&active->base, __live_active, __live_retire, 0); return active; } diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c deleted file mode 100644 index f0f5c4df8dbc..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ /dev/null @@ -1,789 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include <linux/prime_numbers.h> - -#include "../i915_selftest.h" -#include "i915_random.h" - -static void __igt_dump_block(struct i915_buddy_mm *mm, - struct i915_buddy_block *block, - bool buddy) -{ - pr_err("block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%s buddy=%s\n", - block->header, - i915_buddy_block_state(block), - i915_buddy_block_order(block), - i915_buddy_block_offset(block), - i915_buddy_block_size(mm, block), - yesno(!block->parent), - yesno(buddy)); -} - -static void igt_dump_block(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - struct i915_buddy_block *buddy; - - __igt_dump_block(mm, block, false); - - buddy = get_buddy(block); - if (buddy) - __igt_dump_block(mm, buddy, true); -} - -static int igt_check_block(struct i915_buddy_mm *mm, - struct i915_buddy_block *block) -{ - struct i915_buddy_block *buddy; - unsigned int block_state; - u64 block_size; - u64 offset; - int err = 0; - - block_state = i915_buddy_block_state(block); - - if (block_state != I915_BUDDY_ALLOCATED && - block_state != I915_BUDDY_FREE && - block_state != I915_BUDDY_SPLIT) { - pr_err("block state mismatch\n"); - err = -EINVAL; - } - - block_size = i915_buddy_block_size(mm, block); - offset = i915_buddy_block_offset(block); - - if (block_size < mm->chunk_size) { - pr_err("block size smaller than min size\n"); - err = -EINVAL; - } - - if (!is_power_of_2(block_size)) { - pr_err("block size not power of two\n"); - err = -EINVAL; - } - - if (!IS_ALIGNED(block_size, mm->chunk_size)) { - pr_err("block size not aligned to min size\n"); - err = -EINVAL; - } - - if (!IS_ALIGNED(offset, mm->chunk_size)) { - pr_err("block offset not aligned to min size\n"); - err = -EINVAL; - } - - if (!IS_ALIGNED(offset, block_size)) { - pr_err("block offset not aligned to block size\n"); - err = -EINVAL; - } - - buddy = get_buddy(block); - - if (!buddy && block->parent) { - pr_err("buddy has gone fishing\n"); - err = -EINVAL; - } - - if (buddy) { - if (i915_buddy_block_offset(buddy) != (offset ^ block_size)) { - pr_err("buddy has wrong offset\n"); - err = -EINVAL; - } - - if (i915_buddy_block_size(mm, buddy) != block_size) { - pr_err("buddy size mismatch\n"); - err = -EINVAL; - } - - if (i915_buddy_block_state(buddy) == block_state && - block_state == I915_BUDDY_FREE) { - pr_err("block and its buddy are free\n"); - err = -EINVAL; - } - } - - return err; -} - -static int igt_check_blocks(struct i915_buddy_mm *mm, - struct list_head *blocks, - u64 expected_size, - bool is_contiguous) -{ - struct i915_buddy_block *block; - struct i915_buddy_block *prev; - u64 total; - int err = 0; - - block = NULL; - prev = NULL; - total = 0; - - list_for_each_entry(block, blocks, link) { - err = igt_check_block(mm, block); - - if (!i915_buddy_block_is_allocated(block)) { - pr_err("block not allocated\n"), - err = -EINVAL; - } - - if (is_contiguous && prev) { - u64 prev_block_size; - u64 prev_offset; - u64 offset; - - prev_offset = i915_buddy_block_offset(prev); - prev_block_size = i915_buddy_block_size(mm, prev); - offset = i915_buddy_block_offset(block); - - if (offset != (prev_offset + prev_block_size)) { - pr_err("block offset mismatch\n"); - err = -EINVAL; - } - } - - if (err) - break; - - total += i915_buddy_block_size(mm, block); - prev = block; - } - - if (!err) { - if (total != expected_size) { - pr_err("size mismatch, expected=%llx, found=%llx\n", - expected_size, total); - err = -EINVAL; - } - return err; - } - - if (prev) { - pr_err("prev block, dump:\n"); - igt_dump_block(mm, prev); - } - - if (block) { - pr_err("bad block, dump:\n"); - igt_dump_block(mm, block); - } - - return err; -} - -static int igt_check_mm(struct i915_buddy_mm *mm) -{ - struct i915_buddy_block *root; - struct i915_buddy_block *prev; - unsigned int i; - u64 total; - int err = 0; - - if (!mm->n_roots) { - pr_err("n_roots is zero\n"); - return -EINVAL; - } - - if (mm->n_roots != hweight64(mm->size)) { - pr_err("n_roots mismatch, n_roots=%u, expected=%lu\n", - mm->n_roots, hweight64(mm->size)); - return -EINVAL; - } - - root = NULL; - prev = NULL; - total = 0; - - for (i = 0; i < mm->n_roots; ++i) { - struct i915_buddy_block *block; - unsigned int order; - - root = mm->roots[i]; - if (!root) { - pr_err("root(%u) is NULL\n", i); - err = -EINVAL; - break; - } - - err = igt_check_block(mm, root); - - if (!i915_buddy_block_is_free(root)) { - pr_err("root not free\n"); - err = -EINVAL; - } - - order = i915_buddy_block_order(root); - - if (!i) { - if (order != mm->max_order) { - pr_err("max order root missing\n"); - err = -EINVAL; - } - } - - if (prev) { - u64 prev_block_size; - u64 prev_offset; - u64 offset; - - prev_offset = i915_buddy_block_offset(prev); - prev_block_size = i915_buddy_block_size(mm, prev); - offset = i915_buddy_block_offset(root); - - if (offset != (prev_offset + prev_block_size)) { - pr_err("root offset mismatch\n"); - err = -EINVAL; - } - } - - block = list_first_entry_or_null(&mm->free_list[order], - struct i915_buddy_block, - link); - if (block != root) { - pr_err("root mismatch at order=%u\n", order); - err = -EINVAL; - } - - if (err) - break; - - prev = root; - total += i915_buddy_block_size(mm, root); - } - - if (!err) { - if (total != mm->size) { - pr_err("expected mm size=%llx, found=%llx\n", mm->size, - total); - err = -EINVAL; - } - return err; - } - - if (prev) { - pr_err("prev root(%u), dump:\n", i - 1); - igt_dump_block(mm, prev); - } - - if (root) { - pr_err("bad root(%u), dump:\n", i); - igt_dump_block(mm, root); - } - - return err; -} - -static void igt_mm_config(u64 *size, u64 *chunk_size) -{ - I915_RND_STATE(prng); - u32 s, ms; - - /* Nothing fancy, just try to get an interesting bit pattern */ - - prandom_seed_state(&prng, i915_selftest.random_seed); - - /* Let size be a random number of pages up to 8 GB (2M pages) */ - s = 1 + i915_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); - /* Let the chunk size be a random power of 2 less than size */ - ms = BIT(i915_prandom_u32_max_state(ilog2(s), &prng)); - /* Round size down to the chunk size */ - s &= -ms; - - /* Convert from pages to bytes */ - *chunk_size = (u64)ms << 12; - *size = (u64)s << 12; -} - -static int igt_buddy_alloc_smoke(void *arg) -{ - struct i915_buddy_mm mm; - IGT_TIMEOUT(end_time); - I915_RND_STATE(prng); - u64 chunk_size; - u64 mm_size; - int *order; - int err, i; - - igt_mm_config(&mm_size, &chunk_size); - - pr_info("buddy_init with size=%llx, chunk_size=%llx\n", mm_size, chunk_size); - - err = i915_buddy_init(&mm, mm_size, chunk_size); - if (err) { - pr_err("buddy_init failed(%d)\n", err); - return err; - } - - order = i915_random_order(mm.max_order + 1, &prng); - if (!order) - goto out_fini; - - for (i = 0; i <= mm.max_order; ++i) { - struct i915_buddy_block *block; - int max_order = order[i]; - bool timeout = false; - LIST_HEAD(blocks); - int order; - u64 total; - - err = igt_check_mm(&mm); - if (err) { - pr_err("pre-mm check failed, abort\n"); - break; - } - - pr_info("filling from max_order=%u\n", max_order); - - order = max_order; - total = 0; - - do { -retry: - block = i915_buddy_alloc(&mm, order); - if (IS_ERR(block)) { - err = PTR_ERR(block); - if (err == -ENOMEM) { - pr_info("buddy_alloc hit -ENOMEM with order=%d\n", - order); - } else { - if (order--) { - err = 0; - goto retry; - } - - pr_err("buddy_alloc with order=%d failed(%d)\n", - order, err); - } - - break; - } - - list_add_tail(&block->link, &blocks); - - if (i915_buddy_block_order(block) != order) { - pr_err("buddy_alloc order mismatch\n"); - err = -EINVAL; - break; - } - - total += i915_buddy_block_size(&mm, block); - - if (__igt_timeout(end_time, NULL)) { - timeout = true; - break; - } - } while (total < mm.size); - - if (!err) - err = igt_check_blocks(&mm, &blocks, total, false); - - i915_buddy_free_list(&mm, &blocks); - - if (!err) { - err = igt_check_mm(&mm); - if (err) - pr_err("post-mm check failed\n"); - } - - if (err || timeout) - break; - - cond_resched(); - } - - if (err == -ENOMEM) - err = 0; - - kfree(order); -out_fini: - i915_buddy_fini(&mm); - - return err; -} - -static int igt_buddy_alloc_pessimistic(void *arg) -{ - const unsigned int max_order = 16; - struct i915_buddy_block *block, *bn; - struct i915_buddy_mm mm; - unsigned int order; - LIST_HEAD(blocks); - int err; - - /* - * Create a pot-sized mm, then allocate one of each possible - * order within. This should leave the mm with exactly one - * page left. - */ - - err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); - if (err) { - pr_err("buddy_init failed(%d)\n", err); - return err; - } - GEM_BUG_ON(mm.max_order != max_order); - - for (order = 0; order < max_order; order++) { - block = i915_buddy_alloc(&mm, order); - if (IS_ERR(block)) { - pr_info("buddy_alloc hit -ENOMEM with order=%d\n", - order); - err = PTR_ERR(block); - goto err; - } - - list_add_tail(&block->link, &blocks); - } - - /* And now the last remaining block available */ - block = i915_buddy_alloc(&mm, 0); - if (IS_ERR(block)) { - pr_info("buddy_alloc hit -ENOMEM on final alloc\n"); - err = PTR_ERR(block); - goto err; - } - list_add_tail(&block->link, &blocks); - - /* Should be completely full! */ - for (order = max_order; order--; ) { - block = i915_buddy_alloc(&mm, order); - if (!IS_ERR(block)) { - pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", - order); - list_add_tail(&block->link, &blocks); - err = -EINVAL; - goto err; - } - } - - block = list_last_entry(&blocks, typeof(*block), link); - list_del(&block->link); - i915_buddy_free(&mm, block); - - /* As we free in increasing size, we make available larger blocks */ - order = 1; - list_for_each_entry_safe(block, bn, &blocks, link) { - list_del(&block->link); - i915_buddy_free(&mm, block); - - block = i915_buddy_alloc(&mm, order); - if (IS_ERR(block)) { - pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", - order); - err = PTR_ERR(block); - goto err; - } - i915_buddy_free(&mm, block); - order++; - } - - /* To confirm, now the whole mm should be available */ - block = i915_buddy_alloc(&mm, max_order); - if (IS_ERR(block)) { - pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", - max_order); - err = PTR_ERR(block); - goto err; - } - i915_buddy_free(&mm, block); - -err: - i915_buddy_free_list(&mm, &blocks); - i915_buddy_fini(&mm); - return err; -} - -static int igt_buddy_alloc_optimistic(void *arg) -{ - const int max_order = 16; - struct i915_buddy_block *block; - struct i915_buddy_mm mm; - LIST_HEAD(blocks); - int order; - int err; - - /* - * Create a mm with one block of each order available, and - * try to allocate them all. - */ - - err = i915_buddy_init(&mm, - PAGE_SIZE * ((1 << (max_order + 1)) - 1), - PAGE_SIZE); - if (err) { - pr_err("buddy_init failed(%d)\n", err); - return err; - } - GEM_BUG_ON(mm.max_order != max_order); - - for (order = 0; order <= max_order; order++) { - block = i915_buddy_alloc(&mm, order); - if (IS_ERR(block)) { - pr_info("buddy_alloc hit -ENOMEM with order=%d\n", - order); - err = PTR_ERR(block); - goto err; - } - - list_add_tail(&block->link, &blocks); - } - - /* Should be completely full! */ - block = i915_buddy_alloc(&mm, 0); - if (!IS_ERR(block)) { - pr_info("buddy_alloc unexpectedly succeeded, it should be full!"); - list_add_tail(&block->link, &blocks); - err = -EINVAL; - goto err; - } - -err: - i915_buddy_free_list(&mm, &blocks); - i915_buddy_fini(&mm); - return err; -} - -static int igt_buddy_alloc_pathological(void *arg) -{ - const int max_order = 16; - struct i915_buddy_block *block; - struct i915_buddy_mm mm; - LIST_HEAD(blocks); - LIST_HEAD(holes); - int order, top; - int err; - - /* - * Create a pot-sized mm, then allocate one of each possible - * order within. This should leave the mm with exactly one - * page left. Free the largest block, then whittle down again. - * Eventually we will have a fully 50% fragmented mm. - */ - - err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); - if (err) { - pr_err("buddy_init failed(%d)\n", err); - return err; - } - GEM_BUG_ON(mm.max_order != max_order); - - for (top = max_order; top; top--) { - /* Make room by freeing the largest allocated block */ - block = list_first_entry_or_null(&blocks, typeof(*block), link); - if (block) { - list_del(&block->link); - i915_buddy_free(&mm, block); - } - - for (order = top; order--; ) { - block = i915_buddy_alloc(&mm, order); - if (IS_ERR(block)) { - pr_info("buddy_alloc hit -ENOMEM with order=%d, top=%d\n", - order, top); - err = PTR_ERR(block); - goto err; - } - list_add_tail(&block->link, &blocks); - } - - /* There should be one final page for this sub-allocation */ - block = i915_buddy_alloc(&mm, 0); - if (IS_ERR(block)) { - pr_info("buddy_alloc hit -ENOMEM for hole\n"); - err = PTR_ERR(block); - goto err; - } - list_add_tail(&block->link, &holes); - - block = i915_buddy_alloc(&mm, top); - if (!IS_ERR(block)) { - pr_info("buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", - top, max_order); - list_add_tail(&block->link, &blocks); - err = -EINVAL; - goto err; - } - } - - i915_buddy_free_list(&mm, &holes); - - /* Nothing larger than blocks of chunk_size now available */ - for (order = 1; order <= max_order; order++) { - block = i915_buddy_alloc(&mm, order); - if (!IS_ERR(block)) { - pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", - order); - list_add_tail(&block->link, &blocks); - err = -EINVAL; - goto err; - } - } - -err: - list_splice_tail(&holes, &blocks); - i915_buddy_free_list(&mm, &blocks); - i915_buddy_fini(&mm); - return err; -} - -static int igt_buddy_alloc_range(void *arg) -{ - struct i915_buddy_mm mm; - unsigned long page_num; - LIST_HEAD(blocks); - u64 chunk_size; - u64 offset; - u64 size; - u64 rem; - int err; - - igt_mm_config(&size, &chunk_size); - - pr_info("buddy_init with size=%llx, chunk_size=%llx\n", size, chunk_size); - - err = i915_buddy_init(&mm, size, chunk_size); - if (err) { - pr_err("buddy_init failed(%d)\n", err); - return err; - } - - err = igt_check_mm(&mm); - if (err) { - pr_err("pre-mm check failed, abort, abort, abort!\n"); - goto err_fini; - } - - rem = mm.size; - offset = 0; - - for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { - struct i915_buddy_block *block; - LIST_HEAD(tmp); - - size = min(page_num * mm.chunk_size, rem); - - err = i915_buddy_alloc_range(&mm, &tmp, offset, size); - if (err) { - if (err == -ENOMEM) { - pr_info("alloc_range hit -ENOMEM with size=%llx\n", - size); - } else { - pr_err("alloc_range with offset=%llx, size=%llx failed(%d)\n", - offset, size, err); - } - - break; - } - - block = list_first_entry_or_null(&tmp, - struct i915_buddy_block, - link); - if (!block) { - pr_err("alloc_range has no blocks\n"); - err = -EINVAL; - break; - } - - if (i915_buddy_block_offset(block) != offset) { - pr_err("alloc_range start offset mismatch, found=%llx, expected=%llx\n", - i915_buddy_block_offset(block), offset); - err = -EINVAL; - } - - if (!err) - err = igt_check_blocks(&mm, &tmp, size, true); - - list_splice_tail(&tmp, &blocks); - - if (err) - break; - - offset += size; - - rem -= size; - if (!rem) - break; - - cond_resched(); - } - - if (err == -ENOMEM) - err = 0; - - i915_buddy_free_list(&mm, &blocks); - - if (!err) { - err = igt_check_mm(&mm); - if (err) - pr_err("post-mm check failed\n"); - } - -err_fini: - i915_buddy_fini(&mm); - - return err; -} - -static int igt_buddy_alloc_limit(void *arg) -{ - struct i915_buddy_block *block; - struct i915_buddy_mm mm; - const u64 size = U64_MAX; - int err; - - err = i915_buddy_init(&mm, size, PAGE_SIZE); - if (err) - return err; - - if (mm.max_order != I915_BUDDY_MAX_ORDER) { - pr_err("mm.max_order(%d) != %d\n", - mm.max_order, I915_BUDDY_MAX_ORDER); - err = -EINVAL; - goto out_fini; - } - - block = i915_buddy_alloc(&mm, mm.max_order); - if (IS_ERR(block)) { - err = PTR_ERR(block); - goto out_fini; - } - - if (i915_buddy_block_order(block) != mm.max_order) { - pr_err("block order(%d) != %d\n", - i915_buddy_block_order(block), mm.max_order); - err = -EINVAL; - goto out_free; - } - - if (i915_buddy_block_size(&mm, block) != - BIT_ULL(mm.max_order) * PAGE_SIZE) { - pr_err("block size(%llu) != %llu\n", - i915_buddy_block_size(&mm, block), - BIT_ULL(mm.max_order) * PAGE_SIZE); - err = -EINVAL; - goto out_free; - } - -out_free: - i915_buddy_free(&mm, block); -out_fini: - i915_buddy_fini(&mm); - return err; -} - -int i915_buddy_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_buddy_alloc_pessimistic), - SUBTEST(igt_buddy_alloc_optimistic), - SUBTEST(igt_buddy_alloc_pathological), - SUBTEST(igt_buddy_alloc_smoke), - SUBTEST(igt_buddy_alloc_range), - SUBTEST(igt_buddy_alloc_limit), - }; - - return i915_subtests(tests, NULL); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index dc394fb7ccfa..152d9ab135b1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -87,14 +87,14 @@ static void simulate_hibernate(struct drm_i915_private *i915) intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static int pm_prepare(struct drm_i915_private *i915) +static int igt_pm_prepare(struct drm_i915_private *i915) { i915_gem_suspend(i915); return 0; } -static void pm_suspend(struct drm_i915_private *i915) +static void igt_pm_suspend(struct drm_i915_private *i915) { intel_wakeref_t wakeref; @@ -104,7 +104,7 @@ static void pm_suspend(struct drm_i915_private *i915) } } -static void pm_hibernate(struct drm_i915_private *i915) +static void igt_pm_hibernate(struct drm_i915_private *i915) { intel_wakeref_t wakeref; @@ -116,7 +116,7 @@ static void pm_hibernate(struct drm_i915_private *i915) } } -static void pm_resume(struct drm_i915_private *i915) +static void igt_pm_resume(struct drm_i915_private *i915) { intel_wakeref_t wakeref; @@ -148,16 +148,16 @@ static int igt_gem_suspend(void *arg) if (err) goto out; - err = pm_prepare(i915); + err = igt_pm_prepare(i915); if (err) goto out; - pm_suspend(i915); + igt_pm_suspend(i915); /* Here be dragons! Note that with S3RST any S3 may become S4! */ simulate_hibernate(i915); - pm_resume(i915); + igt_pm_resume(i915); err = switch_to_context(ctx); out: @@ -183,16 +183,16 @@ static int igt_gem_hibernate(void *arg) if (err) goto out; - err = pm_prepare(i915); + err = igt_pm_prepare(i915); if (err) goto out; - pm_hibernate(i915); + igt_pm_hibernate(i915); /* Here be dragons! */ simulate_hibernate(i915); - pm_resume(i915); + igt_pm_resume(i915); err = switch_to_context(ctx); out: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6c2284ef4024..f843a5040706 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -186,7 +186,7 @@ retry: if (err) goto err_ppgtt_cleanup; - err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); + err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); if (err) { i915_vm_free_pt_stash(&ppgtt->vm, &stash); goto err_ppgtt_cleanup; @@ -208,7 +208,7 @@ retry: if (err) goto err_ppgtt_cleanup; - err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); + err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); if (err) { i915_vm_free_pt_stash(&ppgtt->vm, &stash); goto err_ppgtt_cleanup; @@ -325,11 +325,10 @@ retry: BIT_ULL(size))) goto alloc_vm_end; - err = i915_vm_pin_pt_stash(vm, &stash); + err = i915_vm_map_pt_stash(vm, &stash); if (!err) vm->allocate_va_range(vm, &stash, addr, BIT_ULL(size)); - i915_vm_free_pt_stash(vm, &stash); alloc_vm_end: if (err == -EDEADLK) { @@ -1968,10 +1967,9 @@ retry: if (err) goto end_ww; - err = i915_vm_pin_pt_stash(vm, &stash); + err = i915_vm_map_pt_stash(vm, &stash); if (!err) vm->allocate_va_range(vm, &stash, offset, chunk_size); - i915_vm_free_pt_stash(vm, &stash); end_ww: if (err == -EDEADLK) { diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 3db34d3eea58..34e5caf38093 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -33,5 +33,4 @@ selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) selftest(contexts, i915_gem_context_mock_selftests) -selftest(buddy, i915_buddy_mock_selftests) selftest(memory_region, intel_memory_region_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index bd83a8ec55af..9e9a6cb1d9e5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg) } /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ - scratch = kmap(__px_page(ce->vm->scratch[0])); + scratch = __px_vaddr(ce->vm->scratch[0]); memset(scratch, POISON_FREE, PAGE_SIZE); rq = intel_context_create_request(ce); @@ -405,7 +405,6 @@ static int live_noa_gpr(void *arg) out_rq: i915_request_put(rq); out_ce: - kunmap(__px_page(ce->vm->scratch[0])); intel_context_put(ce); out: stream_destroy(stream); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index db367a6721c5..bd5c96a77ba3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1592,8 +1592,8 @@ static int live_breadcrumbs_smoketest(void *arg) for (n = 0; n < smoke[0].ncontexts; n++) { smoke[0].contexts[n] = live_context(i915, file); - if (!smoke[0].contexts[n]) { - ret = -ENOMEM; + if (IS_ERR(smoke[0].contexts[n])) { + ret = PTR_ERR(smoke[0].contexts[n]); goto out_contexts; } } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 5fe7b80ca0bd..dd0607254a95 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -967,6 +967,9 @@ static int igt_vma_remapped_gtt(void *arg) intel_wakeref_t wakeref; int err = 0; + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + obj = i915_gem_object_create_internal(i915, 10 * 10 * PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 55c9356de5ad..24d87d0fc747 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -94,9 +94,9 @@ int igt_spinner_pin(struct igt_spinner *spin, } if (!spin->batch) { - unsigned int mode = - i915_coherent_map_type(spin->gt->i915); + unsigned int mode; + mode = i915_coherent_map_type(spin->gt->i915, spin->obj, false); vaddr = igt_spinner_pin_obj(ce, ww, spin->obj, mode, &spin->batch_vma); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index a5fc0bf3feb9..c85d516b85cd 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -57,9 +57,10 @@ static int igt_mock_fill(void *arg) LIST_HEAD(objects); int err = 0; - page_size = mem->mm.chunk_size; - max_pages = div64_u64(total, page_size); + page_size = mem->chunk_size; rem = total; +retry: + max_pages = div64_u64(rem, page_size); for_each_prime_number_from(page_num, 1, max_pages) { resource_size_t size = page_num * page_size; @@ -85,6 +86,11 @@ static int igt_mock_fill(void *arg) err = 0; if (err == -ENXIO) { if (page_num * page_size <= rem) { + if (mem->is_range_manager && max_pages > 1) { + max_pages >>= 1; + goto retry; + } + pr_err("%s failed, space still left in region\n", __func__); err = -EINVAL; @@ -199,12 +205,18 @@ static int igt_mock_reserve(void *arg) do { u32 size = i915_prandom_u32_max_state(cur_avail, &prng); +retry: size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { - if (PTR_ERR(obj) == -ENXIO) + if (PTR_ERR(obj) == -ENXIO) { + if (mem->is_range_manager && + size > mem->chunk_size) { + size >>= 1; + goto retry; + } break; - + } err = PTR_ERR(obj); goto out_close; } @@ -220,7 +232,7 @@ static int igt_mock_reserve(void *arg) out_close: kfree(order); close_objects(mem, &objects); - i915_buddy_free_list(&mem->mm, &mem->reserved); + intel_memory_region_unreserve(mem); return err; } @@ -240,7 +252,7 @@ static int igt_mock_contiguous(void *arg) total = resource_size(&mem->region); /* Min size */ - obj = igt_object_create(mem, &objects, mem->mm.chunk_size, + obj = igt_object_create(mem, &objects, mem->chunk_size, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -321,14 +333,16 @@ static int igt_mock_contiguous(void *arg) min = target; target = total >> 1; - /* Make sure we can still allocate all the fragmented space */ - obj = igt_object_create(mem, &objects, target, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_close_objects; - } + if (!mem->is_range_manager) { + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } - igt_object_release(obj); + igt_object_release(obj); + } /* * Even though we have enough free space, we don't have a big enough @@ -348,7 +362,7 @@ static int igt_mock_contiguous(void *arg) } target >>= 1; - } while (target >= mem->mm.chunk_size); + } while (target >= mem->chunk_size); err_close_objects: list_splice_tail(&holes, &objects); @@ -368,7 +382,7 @@ static int igt_mock_splintered_region(void *arg) /* * Sanity check we can still allocate everything even if the - * mm.max_order != mm.size. i.e our starting address space size is not a + * max_order != mm.size. i.e our starting address space size is not a * power-of-two. */ @@ -377,17 +391,10 @@ static int igt_mock_splintered_region(void *arg) if (IS_ERR(mem)) return PTR_ERR(mem); - if (mem->mm.size != size) { - pr_err("%s size mismatch(%llu != %llu)\n", - __func__, mem->mm.size, size); - err = -EINVAL; - goto out_put; - } - expected_order = get_order(rounddown_pow_of_two(size)); - if (mem->mm.max_order != expected_order) { + if (mem->max_order != expected_order) { pr_err("%s order mismatch(%u != %u)\n", - __func__, mem->mm.max_order, expected_order); + __func__, mem->max_order, expected_order); err = -EINVAL; goto out_put; } @@ -408,12 +415,15 @@ static int igt_mock_splintered_region(void *arg) * sure that does indeed hold true. */ - obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); - if (!IS_ERR(obj)) { - pr_err("%s too large contiguous allocation was not rejected\n", - __func__); - err = -EINVAL; - goto out_close; + if (!mem->is_range_manager) { + obj = igt_object_create(mem, &objects, size, + I915_BO_ALLOC_CONTIGUOUS); + if (!IS_ERR(obj)) { + pr_err("%s too large contiguous allocation was not rejected\n", + __func__); + err = -EINVAL; + goto out_close; + } } obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), @@ -432,68 +442,6 @@ out_put: return err; } -#ifndef SZ_8G -#define SZ_8G BIT_ULL(33) -#endif - -static int igt_mock_max_segment(void *arg) -{ - const unsigned int max_segment = i915_sg_segment_size(); - struct intel_memory_region *mem = arg; - struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; - struct i915_buddy_block *block; - struct scatterlist *sg; - LIST_HEAD(objects); - u64 size; - int err = 0; - - /* - * While we may create very large contiguous blocks, we may need - * to break those down for consumption elsewhere. In particular, - * dma-mapping with scatterlist elements have an implicit limit of - * UINT_MAX on each element. - */ - - size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); - if (IS_ERR(mem)) - return PTR_ERR(mem); - - obj = igt_object_create(mem, &objects, size, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_put; - } - - size = 0; - list_for_each_entry(block, &obj->mm.blocks, link) { - if (i915_buddy_block_size(&mem->mm, block) > size) - size = i915_buddy_block_size(&mem->mm, block); - } - if (size < max_segment) { - pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", - __func__, max_segment, size); - err = -EINVAL; - goto out_close; - } - - for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { - if (sg->length > max_segment) { - pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", - __func__, sg->length, max_segment); - err = -EINVAL; - goto out_close; - } - } - -out_close: - close_objects(mem, &objects); -out_put: - intel_memory_region_put(mem); - return err; -} - static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -513,7 +461,7 @@ static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) if (err) return err; - ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); + ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(ptr)) return PTR_ERR(ptr); @@ -593,7 +541,9 @@ static int igt_gpu_write(struct i915_gem_context *ctx, if (err) break; + i915_gem_object_lock(obj, NULL); err = igt_cpu_check(obj, dword, rng); + i915_gem_object_unlock(obj); if (err) break; } while (!__igt_timeout(end_time, NULL)); @@ -629,6 +579,88 @@ out_put: return err; } +static int igt_lmem_create_cleared_cpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + u32 size, i; + int err; + + i915_gem_drain_freed_objects(i915); + + size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng)); + size = round_up(size, PAGE_SIZE); + i = 0; + + do { + struct drm_i915_gem_object *obj; + unsigned int flags; + u32 dword, val; + void *vaddr; + + /* + * Alternate between cleared and uncleared allocations, while + * also dirtying the pages each time to check that the pages are + * always cleared if requested, since we should get some overlap + * of the underlying pages, if not all, since we are the only + * user. + */ + + flags = I915_BO_ALLOC_CPU_CLEAR; + if (i & 1) + flags = 0; + + obj = i915_gem_object_create_lmem(i915, size, flags); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + i915_gem_object_lock(obj, NULL); + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32), + &prng); + + if (flags & I915_BO_ALLOC_CPU_CLEAR) { + err = igt_cpu_check(obj, dword, 0); + if (err) { + pr_err("%s failed with size=%u, flags=%u\n", + __func__, size, flags); + goto out_unpin; + } + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_unpin; + } + + val = prandom_u32_state(&prng); + + memset32(vaddr, val, obj->base.size / sizeof(u32)); + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); +out_unpin: + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); +out_put: + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + + if (err) + break; + ++i; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s completed (%u) iterations\n", __func__, i); + + return err; +} + static int igt_lmem_write_gpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -1014,7 +1046,6 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), - SUBTEST(igt_mock_max_segment), }; struct intel_memory_region *mem; struct drm_i915_private *i915; @@ -1043,6 +1074,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_create_cleared_cpu), SUBTEST(igt_lmem_write_cpu), SUBTEST(igt_lmem_write_gpu), }; diff --git a/drivers/gpu/drm/i915/selftests/librapl.c b/drivers/gpu/drm/i915/selftests/librapl.c index 58710ac3f979..eb03b5b28bad 100644 --- a/drivers/gpu/drm/i915/selftests/librapl.c +++ b/drivers/gpu/drm/i915/selftests/librapl.c @@ -5,8 +5,18 @@ #include <asm/msr.h> +#include "i915_drv.h" #include "librapl.h" +bool librapl_supported(const struct drm_i915_private *i915) +{ + /* Discrete cards require hwmon integration */ + if (IS_DGFX(i915)) + return false; + + return librapl_energy_uJ(); +} + u64 librapl_energy_uJ(void) { unsigned long long power; diff --git a/drivers/gpu/drm/i915/selftests/librapl.h b/drivers/gpu/drm/i915/selftests/librapl.h index 887f3e91dd05..e3b24fad0a7a 100644 --- a/drivers/gpu/drm/i915/selftests/librapl.h +++ b/drivers/gpu/drm/i915/selftests/librapl.h @@ -8,6 +8,10 @@ #include <linux/types.h> +struct drm_i915_private; + +bool librapl_supported(const struct drm_i915_private *i915); + u64 librapl_energy_uJ(void); #endif /* SELFTEST_LIBRAPL_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index cf40004bc92a..d189c4bd4bef 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -32,6 +32,7 @@ #include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" #include "intel_memory_region.h" +#include "intel_region_ttm.h" #include "mock_request.h" #include "mock_gem_device.h" @@ -70,6 +71,7 @@ static void mock_device_release(struct drm_device *dev) mock_fini_ggtt(&i915->ggtt); destroy_workqueue(i915->wq); + intel_region_ttm_device_fini(i915); intel_gt_driver_late_release(&i915->gt); intel_memory_regions_driver_release(i915); @@ -116,6 +118,7 @@ struct drm_i915_private *mock_gem_device(void) #endif struct drm_i915_private *i915; struct pci_dev *pdev; + int ret; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); if (!pdev) @@ -178,6 +181,10 @@ struct drm_i915_private *mock_gem_device(void) atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ i915->gt.awake = -ENODEV; + ret = intel_region_ttm_device_init(i915); + if (ret) + goto err_ttm; + i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) goto err_drv; @@ -201,6 +208,7 @@ struct drm_i915_private *mock_gem_device(void) intel_engines_driver_register(i915); i915->do_release = true; + ida_init(&i915->selftest.mock_region_instances); return i915; @@ -209,6 +217,8 @@ err_context: err_unlock: destroy_workqueue(i915->wq); err_drv: + intel_region_ttm_device_fini(i915); +err_ttm: intel_gt_driver_late_release(&i915->gt); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 5d2d010a1e22..eafc5a04975c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -1,17 +1,56 @@ // SPDX-License-Identifier: MIT /* - * Copyright © 2019 Intel Corporation + * Copyright © 2019-2021 Intel Corporation */ +#include <linux/scatterlist.h> + +#include <drm/ttm/ttm_placement.h> + #include "gem/i915_gem_region.h" #include "intel_memory_region.h" +#include "intel_region_ttm.h" #include "mock_region.h" +static void mock_region_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + sg_free_table(pages); + kfree(pages); +} + +static int mock_region_get_pages(struct drm_i915_gem_object *obj) +{ + unsigned int flags; + struct sg_table *pages; + + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + + obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, + obj->base.size, + flags); + if (IS_ERR(obj->mm.st_mm_node)) + return PTR_ERR(obj->mm.st_mm_node); + + pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); + if (IS_ERR(pages)) { + intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + return PTR_ERR(pages); + } + + __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); + + return 0; +} + static const struct drm_i915_gem_object_ops mock_region_obj_ops = { .name = "mock-region", - .get_pages = i915_gem_object_get_pages_buddy, - .put_pages = i915_gem_object_put_pages_buddy, + .get_pages = mock_region_get_pages, + .put_pages = mock_region_put_pages, .release = i915_gem_object_release_memory_region, }; @@ -23,7 +62,7 @@ static int mock_object_init(struct intel_memory_region *mem, static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - if (size > mem->mm.size) + if (size > resource_size(&mem->region)) return -E2BIG; drm_gem_private_object_init(&i915->drm, &obj->base, size); @@ -38,9 +77,18 @@ static int mock_object_init(struct intel_memory_region *mem, return 0; } +static void mock_region_fini(struct intel_memory_region *mem) +{ + struct drm_i915_private *i915 = mem->i915; + int instance = mem->instance; + + intel_region_ttm_fini(mem); + ida_free(&i915->selftest.mock_region_instances, instance); +} + static const struct intel_memory_region_ops mock_region_ops = { - .init = intel_memory_region_init_buddy, - .release = intel_memory_region_release_buddy, + .init = intel_region_ttm_init, + .release = mock_region_fini, .init_object = mock_object_init, }; @@ -51,6 +99,14 @@ mock_region_create(struct drm_i915_private *i915, resource_size_t min_page_size, resource_size_t io_start) { + int instance = ida_alloc_max(&i915->selftest.mock_region_instances, + TTM_NUM_MEM_TYPES - TTM_PL_PRIV - 1, + GFP_KERNEL); + + if (instance < 0) + return ERR_PTR(instance); + return intel_memory_region_create(i915, start, size, min_page_size, - io_start, &mock_region_ops); + io_start, INTEL_MEMORY_MOCK, instance, + &mock_region_ops); } |