summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c301
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h2
3 files changed, 156 insertions, 151 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a193f1b36c67..4df039ef2ce3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
/* Overlap of objects in the same batch? */
if (i915_vma_is_pinned(vma)) {
ret = -ENOSPC;
- if (vma->exec_entry &&
- vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+ if (vma->exec_flags &&
+ *vma->exec_flags & EXEC_OBJECT_PINNED)
ret = -EINVAL;
break;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 044fb1205554..da6cb2fe5f85 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -192,6 +192,8 @@ struct i915_execbuffer {
struct drm_file *file; /** per-file lookup tables and limits */
struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
+ struct i915_vma **vma;
+ unsigned int *flags;
struct intel_engine_cs *engine; /** engine to queue the request to */
struct i915_gem_context *ctx; /** context for building the request */
@@ -245,13 +247,7 @@ struct i915_execbuffer {
struct hlist_head *buckets; /** ht for relocation handles */
};
-/*
- * As an alternative to creating a hashtable of handle-to-vma for a batch,
- * we used the last available reserved field in the execobject[] and stash
- * a link from the execobj to its vma.
- */
-#define __exec_to_vma(ee) (ee)->rsvd2
-#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
+#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
/*
* Used to convert any address to canonical form.
@@ -320,85 +316,82 @@ static int eb_create(struct i915_execbuffer *eb)
static bool
eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
- const struct i915_vma *vma)
+ const struct i915_vma *vma,
+ unsigned int flags)
{
- if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
- return true;
-
if (vma->node.size < entry->pad_to_size)
return true;
if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
return true;
- if (entry->flags & EXEC_OBJECT_PINNED &&
+ if (flags & EXEC_OBJECT_PINNED &&
vma->node.start != entry->offset)
return true;
- if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
+ if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
vma->node.start < BATCH_OFFSET_BIAS)
return true;
- if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+ if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
(vma->node.start + vma->node.size - 1) >> 32)
return true;
return false;
}
-static inline void
+static inline bool
eb_pin_vma(struct i915_execbuffer *eb,
- struct drm_i915_gem_exec_object2 *entry,
+ const struct drm_i915_gem_exec_object2 *entry,
struct i915_vma *vma)
{
- u64 flags;
+ unsigned int exec_flags = *vma->exec_flags;
+ u64 pin_flags;
if (vma->node.size)
- flags = vma->node.start;
+ pin_flags = vma->node.start;
else
- flags = entry->offset & PIN_OFFSET_MASK;
+ pin_flags = entry->offset & PIN_OFFSET_MASK;
- flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
- if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT))
- flags |= PIN_GLOBAL;
+ pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+ if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
+ pin_flags |= PIN_GLOBAL;
- if (unlikely(i915_vma_pin(vma, 0, 0, flags)))
- return;
+ if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
+ return false;
- if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+ if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
if (unlikely(i915_vma_get_fence(vma))) {
i915_vma_unpin(vma);
- return;
+ return false;
}
if (i915_vma_pin_fence(vma))
- entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+ exec_flags |= __EXEC_OBJECT_HAS_FENCE;
}
- entry->flags |= __EXEC_OBJECT_HAS_PIN;
+ *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+ return !eb_vma_misplaced(entry, vma, exec_flags);
}
-static inline void
-__eb_unreserve_vma(struct i915_vma *vma,
- const struct drm_i915_gem_exec_object2 *entry)
+static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
{
- GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
+ GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
- if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
+ if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
i915_vma_unpin_fence(vma);
__i915_vma_unpin(vma);
}
static inline void
-eb_unreserve_vma(struct i915_vma *vma,
- struct drm_i915_gem_exec_object2 *entry)
+eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
{
- if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
+ if (!(*flags & __EXEC_OBJECT_HAS_PIN))
return;
- __eb_unreserve_vma(vma, entry);
- entry->flags &= ~__EXEC_OBJECT_RESERVED;
+ __eb_unreserve_vma(vma, *flags);
+ *flags &= ~__EXEC_OBJECT_RESERVED;
}
static int
@@ -428,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
entry->pad_to_size = 0;
}
- if (unlikely(vma->exec_entry)) {
+ if (unlikely(vma->exec_flags)) {
DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
entry->handle, (int)(entry - eb->exec));
return -EINVAL;
@@ -441,14 +434,25 @@ eb_validate_vma(struct i915_execbuffer *eb,
*/
entry->offset = gen8_noncanonical_addr(entry->offset);
+ if (!eb->reloc_cache.has_fence) {
+ entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
+ } else {
+ if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+ eb->reloc_cache.needs_unfenced) &&
+ i915_gem_object_is_tiled(vma->obj))
+ entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
+ }
+
+ if (!(entry->flags & EXEC_OBJECT_PINNED))
+ entry->flags |= eb->context_flags;
+
return 0;
}
static int
-eb_add_vma(struct i915_execbuffer *eb,
- struct drm_i915_gem_exec_object2 *entry,
- struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
{
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
int err;
GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -469,40 +473,28 @@ eb_add_vma(struct i915_execbuffer *eb,
if (entry->relocation_count)
list_add_tail(&vma->reloc_link, &eb->relocs);
- if (!eb->reloc_cache.has_fence) {
- entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
- } else {
- if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
- eb->reloc_cache.needs_unfenced) &&
- i915_gem_object_is_tiled(vma->obj))
- entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
- }
-
- if (!(entry->flags & EXEC_OBJECT_PINNED))
- entry->flags |= eb->context_flags;
-
/*
* Stash a pointer from the vma to execobj, so we can query its flags,
* size, alignment etc as provided by the user. Also we stash a pointer
* to the vma inside the execobj so that we can use a direct lookup
* to find the right target VMA when doing relocations.
*/
- vma->exec_entry = entry;
- __exec_to_vma(entry) = (uintptr_t)vma;
+ eb->vma[i] = vma;
+ eb->flags[i] = entry->flags;
+ vma->exec_flags = &eb->flags[i];
err = 0;
- eb_pin_vma(eb, entry, vma);
- if (eb_vma_misplaced(entry, vma)) {
- eb_unreserve_vma(vma, entry);
-
- list_add_tail(&vma->exec_link, &eb->unbound);
- if (drm_mm_node_allocated(&vma->node))
- err = i915_vma_unbind(vma);
- } else {
+ if (eb_pin_vma(eb, entry, vma)) {
if (entry->offset != vma->node.start) {
entry->offset = vma->node.start | UPDATE;
eb->args->flags |= __EXEC_HAS_RELOC;
}
+ } else {
+ eb_unreserve_vma(vma, vma->exec_flags);
+
+ list_add_tail(&vma->exec_link, &eb->unbound);
+ if (drm_mm_node_allocated(&vma->node))
+ err = i915_vma_unbind(vma);
}
return err;
}
@@ -527,32 +519,35 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
static int eb_reserve_vma(const struct i915_execbuffer *eb,
struct i915_vma *vma)
{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- u64 flags;
+ struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+ unsigned int exec_flags = *vma->exec_flags;
+ u64 pin_flags;
int err;
- flags = PIN_USER | PIN_NONBLOCK;
- if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
- flags |= PIN_GLOBAL;
+ pin_flags = PIN_USER | PIN_NONBLOCK;
+ if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+ pin_flags |= PIN_GLOBAL;
/*
* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
* limit address to the first 4GBs for unflagged objects.
*/
- if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
- flags |= PIN_ZONE_4G;
+ if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+ pin_flags |= PIN_ZONE_4G;
- if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
- flags |= PIN_MAPPABLE;
+ if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+ pin_flags |= PIN_MAPPABLE;
- if (entry->flags & EXEC_OBJECT_PINNED) {
- flags |= entry->offset | PIN_OFFSET_FIXED;
- flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */
- } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) {
- flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+ if (exec_flags & EXEC_OBJECT_PINNED) {
+ pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+ pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
+ } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
+ pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
}
- err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags);
+ err = i915_vma_pin(vma,
+ entry->pad_to_size, entry->alignment,
+ pin_flags);
if (err)
return err;
@@ -561,7 +556,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
eb->args->flags |= __EXEC_HAS_RELOC;
}
- if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+ if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_get_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
@@ -569,11 +564,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
}
if (i915_vma_pin_fence(vma))
- entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+ exec_flags |= __EXEC_OBJECT_HAS_FENCE;
}
- entry->flags |= __EXEC_OBJECT_HAS_PIN;
- GEM_BUG_ON(eb_vma_misplaced(entry, vma));
+ *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+ GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
return 0;
}
@@ -615,18 +610,18 @@ static int eb_reserve(struct i915_execbuffer *eb)
INIT_LIST_HEAD(&eb->unbound);
INIT_LIST_HEAD(&last);
for (i = 0; i < count; i++) {
- struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ unsigned int flags = eb->flags[i];
+ struct i915_vma *vma = eb->vma[i];
- if (entry->flags & EXEC_OBJECT_PINNED &&
- entry->flags & __EXEC_OBJECT_HAS_PIN)
+ if (flags & EXEC_OBJECT_PINNED &&
+ flags & __EXEC_OBJECT_HAS_PIN)
continue;
- vma = exec_to_vma(entry);
- eb_unreserve_vma(vma, entry);
+ eb_unreserve_vma(vma, &eb->flags[i]);
- if (entry->flags & EXEC_OBJECT_PINNED)
+ if (flags & EXEC_OBJECT_PINNED)
list_add(&vma->exec_link, &eb->unbound);
- else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+ else if (flags & __EXEC_OBJECT_NEEDS_MAP)
list_add_tail(&vma->exec_link, &eb->unbound);
else
list_add_tail(&vma->exec_link, &last);
@@ -714,18 +709,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
for (i = 0; i < count; i++) {
- __exec_to_vma(&eb->exec[i]) = 0;
-
hlist_for_each_entry(vma,
ht_head(lut, eb->exec[i].handle),
ctx_node) {
if (vma->ctx_handle != eb->exec[i].handle)
continue;
- err = eb_add_vma(eb, &eb->exec[i], vma);
+ err = eb_add_vma(eb, i, vma);
if (unlikely(err))
return err;
-
goto next_vma;
}
@@ -746,7 +738,7 @@ next_vma: ;
for (i = slow_pass; i < count; i++) {
struct drm_i915_gem_object *obj;
- if (__exec_to_vma(&eb->exec[i]))
+ if (eb->vma[i])
continue;
obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
@@ -758,14 +750,17 @@ next_vma: ;
goto err;
}
- __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
+ eb->vma[i] = (struct i915_vma *)
+ ptr_pack_bits(obj, INTERMEDIATE, 1);
}
spin_unlock(&eb->file->table_lock);
for (i = slow_pass; i < count; i++) {
struct drm_i915_gem_object *obj;
+ unsigned int is_obj;
- if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
+ obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
+ if (!is_obj)
continue;
/*
@@ -776,8 +771,6 @@ next_vma: ;
* from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm.
*/
- obj = u64_to_ptr(typeof(*obj),
- __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
vma = i915_vma_instance(obj, eb->vm, NULL);
if (unlikely(IS_ERR(vma))) {
DRM_DEBUG("Failed to lookup VMA\n");
@@ -801,14 +794,17 @@ next_vma: ;
i915_vma_get(vma);
}
- err = eb_add_vma(eb, &eb->exec[i], vma);
+ err = eb_add_vma(eb, i, vma);
if (unlikely(err))
goto err;
+ GEM_BUG_ON(vma != eb->vma[i]);
+ GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+
/* Only after we validated the user didn't use our bits */
if (vma->ctx != eb->ctx) {
i915_vma_get(vma);
- eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF;
+ *vma->exec_flags |= __EXEC_OBJECT_HAS_REF;
}
}
@@ -822,7 +818,8 @@ next_vma: ;
out:
/* take note of the batch buffer before we might reorder the lists */
i = eb_batch_index(eb);
- eb->batch = exec_to_vma(&eb->exec[i]);
+ eb->batch = eb->vma[i];
+ GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
/*
* SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -833,18 +830,18 @@ out:
* Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere.
*/
- if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED))
- eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
+ eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
if (eb->reloc_cache.has_fence)
- eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
+ eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
eb->args->flags |= __EXEC_VALIDATED;
return eb_reserve(eb);
err:
for (i = slow_pass; i < count; i++) {
- if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
- __exec_to_vma(&eb->exec[i]) = 0;
+ if (ptr_unmask_bits(eb->vma[i], 1))
+ eb->vma[i] = NULL;
}
lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
return err;
@@ -857,7 +854,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
if (eb->lut_size < 0) {
if (handle >= -eb->lut_size)
return NULL;
- return exec_to_vma(&eb->exec[handle]);
+ return eb->vma[handle];
} else {
struct hlist_head *head;
struct i915_vma *vma;
@@ -877,24 +874,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
unsigned int i;
for (i = 0; i < count; i++) {
- struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
- struct i915_vma *vma = exec_to_vma(entry);
+ struct i915_vma *vma = eb->vma[i];
+ unsigned int flags = eb->flags[i];
if (!vma)
continue;
- GEM_BUG_ON(vma->exec_entry != entry);
- vma->exec_entry = NULL;
- __exec_to_vma(entry) = 0;
+ GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+ vma->exec_flags = NULL;
+ eb->vma[i] = NULL;
- if (entry->flags & __EXEC_OBJECT_HAS_PIN)
- __eb_unreserve_vma(vma, entry);
+ if (flags & __EXEC_OBJECT_HAS_PIN)
+ __eb_unreserve_vma(vma, flags);
- if (entry->flags & __EXEC_OBJECT_HAS_REF)
+ if (flags & __EXEC_OBJECT_HAS_REF)
i915_vma_put(vma);
-
- entry->flags &=
- ~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
}
}
@@ -1383,7 +1377,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
}
if (reloc->write_domain) {
- target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+ *target->exec_flags |= EXEC_OBJECT_WRITE;
/*
* Sandybridge PPGTT errata: We need a global gtt mapping
@@ -1435,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* do relocations we are already stalling, disable the user's opt
* of our synchronisation.
*/
- vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+ *vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
/* and update the user's relocation entry */
return relocate_entry(vma, reloc, eb, target);
@@ -1446,7 +1440,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
struct drm_i915_gem_relocation_entry __user *urelocs;
- const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
unsigned int remain;
urelocs = u64_to_user_ptr(entry->relocs_ptr);
@@ -1529,7 +1523,7 @@ out:
static int
eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
{
- const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
struct drm_i915_gem_relocation_entry *relocs =
u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
unsigned int i;
@@ -1733,6 +1727,8 @@ repeat:
if (err)
goto err;
+ GEM_BUG_ON(!eb->batch);
+
list_for_each_entry(vma, &eb->relocs, reloc_link) {
if (!have_copy) {
pagefault_disable();
@@ -1826,11 +1822,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
int err;
for (i = 0; i < count; i++) {
- struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
- struct i915_vma *vma = exec_to_vma(entry);
+ unsigned int flags = eb->flags[i];
+ struct i915_vma *vma = eb->vma[i];
struct drm_i915_gem_object *obj = vma->obj;
- if (entry->flags & EXEC_OBJECT_CAPTURE) {
+ if (flags & EXEC_OBJECT_CAPTURE) {
struct i915_gem_capture_list *capture;
capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1838,7 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
return -ENOMEM;
capture->next = eb->request->capture_list;
- capture->vma = vma;
+ capture->vma = eb->vma[i];
eb->request->capture_list = capture;
}
@@ -1856,29 +1852,29 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
*/
if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
if (i915_gem_clflush_object(obj, 0))
- entry->flags &= ~EXEC_OBJECT_ASYNC;
+ flags &= ~EXEC_OBJECT_ASYNC;
}
- if (entry->flags & EXEC_OBJECT_ASYNC)
- goto skip_flushes;
+ if (flags & EXEC_OBJECT_ASYNC)
+ continue;
err = i915_gem_request_await_object
- (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
+ (eb->request, obj, flags & EXEC_OBJECT_WRITE);
if (err)
return err;
-
-skip_flushes:
- i915_vma_move_to_active(vma, eb->request, entry->flags);
- __eb_unreserve_vma(vma, entry);
- vma->exec_entry = NULL;
}
for (i = 0; i < count; i++) {
- const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
- struct i915_vma *vma = exec_to_vma(entry);
+ unsigned int flags = eb->flags[i];
+ struct i915_vma *vma = eb->vma[i];
+
+ i915_vma_move_to_active(vma, eb->request, flags);
+ eb_export_fence(vma, eb->request, flags);
- eb_export_fence(vma, eb->request, entry->flags);
- if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
+ __eb_unreserve_vma(vma, flags);
+ vma->exec_flags = NULL;
+
+ if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
i915_vma_put(vma);
}
eb->exec = NULL;
@@ -2007,11 +2003,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
if (IS_ERR(vma))
goto out;
- vma->exec_entry =
- memset(&eb->exec[eb->buffer_count++],
- 0, sizeof(*vma->exec_entry));
- vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
- __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
+ eb->vma[eb->buffer_count] = i915_vma_get(vma);
+ eb->flags[eb->buffer_count] =
+ __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+ vma->exec_flags = &eb->flags[eb->buffer_count];
+ eb->buffer_count++;
out:
i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -2270,7 +2266,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
args->flags |= __EXEC_HAS_RELOC;
+
eb.exec = exec;
+ eb.vma = memset(exec + args->buffer_count + 1, 0,
+ (args->buffer_count + 1) * sizeof(*eb.vma));
+ eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
if (USES_FULL_PPGTT(eb.i915))
eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -2358,7 +2359,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
- if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
+ if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
err = -EINVAL;
goto err_vma;
@@ -2511,7 +2512,9 @@ int
i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file)
{
- const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
+ const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
+ sizeof(struct i915_vma *) +
+ sizeof(unsigned int));
struct drm_i915_gem_execbuffer *args = data;
struct drm_i915_gem_execbuffer2 exec2;
struct drm_i915_gem_exec_object *exec_list = NULL;
@@ -2602,7 +2605,9 @@ int
i915_gem_execbuffer2(struct drm_device *dev, void *data,
struct drm_file *file)
{
- const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
+ const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
+ sizeof(struct i915_vma *) +
+ sizeof(unsigned int));
struct drm_i915_gem_execbuffer2 *args = data;
struct drm_i915_gem_exec_object2 *exec2_list;
struct drm_syncobj **fences = NULL;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 20cf272c97b1..5c49506d14bc 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -112,7 +112,7 @@ struct i915_vma {
/**
* Used for performing relocations during execbuffer insertion.
*/
- struct drm_i915_gem_exec_object2 *exec_entry;
+ unsigned int *exec_flags;
struct hlist_node exec_node;
u32 exec_handle;