summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c164
1 files changed, 102 insertions, 62 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7b676fd1f075..7dcac3bfb771 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,10 +49,10 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ if (obj->cache_dirty)
return false;
- if (!i915_gem_object_is_coherent(obj))
+ if (!obj->cache_coherent)
return true;
return obj->pin_display;
@@ -143,9 +143,9 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
- size_t pinned;
+ u64 pinned;
- pinned = 0;
+ pinned = ggtt->base.reserved;
mutex_lock(&dev->struct_mutex);
list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
if (i915_vma_is_pinned(vma))
@@ -233,6 +233,14 @@ err_phys:
return st;
}
+static void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ if (cpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+}
+
static void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
@@ -245,11 +253,10 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !i915_gem_object_is_coherent(obj))
+ !obj->cache_coherent)
drm_clflush_sg(pages);
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ __start_cpu_write(obj);
}
static void
@@ -684,6 +691,12 @@ i915_gem_dumb_create(struct drm_file *file,
args->size, &args->handle);
}
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ return !(obj->cache_level == I915_CACHE_NONE ||
+ obj->cache_level == I915_CACHE_WT);
+}
+
/**
* Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
@@ -753,6 +766,11 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
break;
+
+ case I915_GEM_DOMAIN_RENDER:
+ if (gpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+ break;
}
obj->base.write_domain = 0;
@@ -838,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (i915_gem_object_is_coherent(obj) ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -854,7 +871,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens.
*/
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ if (!obj->cache_dirty &&
+ !(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
*needs_clflush = CLFLUSH_BEFORE;
out:
@@ -890,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (i915_gem_object_is_coherent(obj) ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -906,14 +923,16 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
* This optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway.
*/
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+ if (!obj->cache_dirty) {
*needs_clflush |= CLFLUSH_AFTER;
- /* Same trick applies to invalidate partially written cachelines read
- * before writing.
- */
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush |= CLFLUSH_BEFORE;
+ /*
+ * Same trick applies to invalidate partially written
+ * cachelines read before writing.
+ */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ *needs_clflush |= CLFLUSH_BEFORE;
+ }
out:
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
@@ -2337,8 +2356,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment;
+ gfp_t noreclaim;
int ret;
- gfp_t gfp;
/* Assert that the object is not currently in any GPU domain. As it
* wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2367,22 +2386,30 @@ rebuild_st:
* Fail silently without starting the shrinker
*/
mapping = obj->base.filp->f_mapping;
- gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
- gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+ noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
sg = st->sgl;
st->nents = 0;
for (i = 0; i < page_count; i++) {
- page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (unlikely(IS_ERR(page))) {
- i915_gem_shrink(dev_priv,
- page_count,
- I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_PURGEABLE);
+ const unsigned int shrink[] = {
+ I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+ 0,
+ }, *s = shrink;
+ gfp_t gfp = noreclaim;
+
+ do {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- }
- if (unlikely(IS_ERR(page))) {
- gfp_t reclaim;
+ if (likely(!IS_ERR(page)))
+ break;
+
+ if (!*s) {
+ ret = PTR_ERR(page);
+ goto err_sg;
+ }
+
+ i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+ cond_resched();
/* We've tried hard to allocate the memory by reaping
* our own buffer, now let the real VM do its job and
@@ -2392,15 +2419,26 @@ rebuild_st:
* defer the oom here by reporting the ENOMEM back
* to userspace.
*/
- reclaim = mapping_gfp_mask(mapping);
- reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
- page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto err_sg;
+ if (!*s) {
+ /* reclaim and warn, but no oom */
+ gfp = mapping_gfp_mask(mapping);
+
+ /* Our bo are always dirty and so we require
+ * kswapd to reclaim our pages (direct reclaim
+ * does not effectively begin pageout of our
+ * buffers on its own). However, direct reclaim
+ * only waits for kswapd when under allocation
+ * congestion. So as a result __GFP_RECLAIM is
+ * unreliable and fails to actually reclaim our
+ * dirty pages -- unless you try over and over
+ * again with !__GFP_NORETRY. However, we still
+ * want to fail this allocation rather than
+ * trigger the out-of-memory killer and for
+ * this we want the future __GFP_MAYFAIL.
+ */
}
- }
+ } while (1);
+
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
@@ -3223,6 +3261,10 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
if (vma->vm->file == fpriv)
i915_vma_close(vma);
+ vma = obj->vma_hashed;
+ if (vma && vma->ctx->file_priv == fpriv)
+ i915_vma_unlink_ctx(vma);
+
if (i915_gem_object_is_active(obj) &&
!i915_gem_object_has_active_reference(obj)) {
i915_gem_object_set_active_reference(obj);
@@ -3376,10 +3418,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
{
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
- return;
-
- i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+ /*
+ * We manually flush the CPU domain so that we can override and
+ * force the flush for the display, and perform it asyncrhonously.
+ */
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ if (obj->cache_dirty)
+ i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
obj->base.write_domain = 0;
}
@@ -3638,13 +3683,11 @@ restart:
}
}
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
- i915_gem_object_is_coherent(obj))
- obj->cache_dirty = true;
-
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
}
@@ -3866,9 +3909,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- return 0;
-
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */
@@ -3880,15 +3920,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
- GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+ GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
/* If we're writing through the CPU, then the GPU read domains will
* need to be invalidated at next use.
*/
- if (write) {
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ if (write)
+ __start_cpu_write(obj);
return 0;
}
@@ -4220,7 +4258,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->global_link);
INIT_LIST_HEAD(&obj->userfault_link);
- INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4285,6 +4322,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
mapping = obj->base.filp->f_mapping;
mapping_set_gfp_mask(mapping, mask);
+ GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
i915_gem_object_init(obj, &i915_gem_object_ops);
@@ -4308,6 +4346,9 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
} else
obj->cache_level = I915_CACHE_NONE;
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ obj->cache_dirty = !obj->cache_coherent;
+
trace_i915_gem_object_create(obj);
return obj;
@@ -4356,7 +4397,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
GEM_BUG_ON(i915_gem_object_is_active(obj));
list_for_each_entry_safe(vma, vn,
&obj->vma_list, obj_link) {
- GEM_BUG_ON(!i915_vma_is_ggtt(vma));
GEM_BUG_ON(i915_vma_is_active(vma));
vma->flags &= ~I915_VMA_PIN_MASK;
i915_vma_close(vma);
@@ -4763,7 +4803,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
- i915_gem_init_userptr(dev_priv);
+ ret = i915_gem_init_userptr(dev_priv);
+ if (ret)
+ goto out_unlock;
ret = i915_gem_init_ggtt(dev_priv);
if (ret)
@@ -4974,10 +5016,8 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
mutex_lock(&dev_priv->drm.struct_mutex);
for (p = phases; *p; p++) {
- list_for_each_entry(obj, *p, global_link) {
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ list_for_each_entry(obj, *p, global_link)
+ __start_cpu_write(obj);
}
mutex_unlock(&dev_priv->drm.struct_mutex);