summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c221
1 files changed, 143 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b189b49c7602..c0ae6bbbd9b5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -349,7 +349,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
if (user_pages == NULL)
return -ENOMEM;
@@ -429,7 +429,7 @@ fail_put_user_pages:
SetPageDirty(user_pages[i]);
page_cache_release(user_pages[i]);
}
- kfree(user_pages);
+ drm_free_large(user_pages);
return ret;
}
@@ -649,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
if (user_pages == NULL)
return -ENOMEM;
@@ -719,7 +719,7 @@ out_unlock:
out_unpin_pages:
for (i = 0; i < pinned_pages; i++)
page_cache_release(user_pages[i]);
- kfree(user_pages);
+ drm_free_large(user_pages);
return ret;
}
@@ -824,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
if (user_pages == NULL)
return -ENOMEM;
@@ -902,7 +902,7 @@ fail_unlock:
fail_put_user_pages:
for (i = 0; i < pinned_pages; i++)
page_cache_release(user_pages[i]);
- kfree(user_pages);
+ drm_free_large(user_pages);
return ret;
}
@@ -989,10 +989,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
return -ENODEV;
/* Only handle setting domains to types used by the CPU. */
- if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+ if (write_domain & I915_GEM_GPU_DOMAINS)
return -EINVAL;
- if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+ if (read_domains & I915_GEM_GPU_DOMAINS)
return -EINVAL;
/* Having something in the write domain implies it's in the read
@@ -1145,7 +1145,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
mutex_unlock(&dev->struct_mutex);
return VM_FAULT_SIGBUS;
}
- list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret) {
+ mutex_unlock(&dev->struct_mutex);
+ return VM_FAULT_SIGBUS;
+ }
+
+ list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
}
/* Need a new fence register? */
@@ -1375,7 +1382,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&dev->struct_mutex);
return ret;
}
- list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+ list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
}
drm_gem_object_unreference(obj);
@@ -1408,9 +1415,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
}
obj_priv->dirty = 0;
- drm_free(obj_priv->pages,
- page_count * sizeof(struct page *),
- DRM_MEM_DRIVER);
+ drm_free_large(obj_priv->pages);
obj_priv->pages = NULL;
}
@@ -1476,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
* Returned sequence numbers are nonzero on success.
*/
static uint32_t
-i915_add_request(struct drm_device *dev, uint32_t flush_domains)
+i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
+ uint32_t flush_domains)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_file_private *i915_file_priv = NULL;
struct drm_i915_gem_request *request;
uint32_t seqno;
int was_empty;
RING_LOCALS;
+ if (file_priv != NULL)
+ i915_file_priv = file_priv->driver_priv;
+
request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
if (request == NULL)
return 0;
@@ -1510,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)
request->emitted_jiffies = jiffies;
was_empty = list_empty(&dev_priv->mm.request_list);
list_add_tail(&request->list, &dev_priv->mm.request_list);
+ if (i915_file_priv) {
+ list_add_tail(&request->client_list,
+ &i915_file_priv->mm.request_list);
+ } else {
+ INIT_LIST_HEAD(&request->client_list);
+ }
/* Associate any objects on the flushing list matching the write
* domain we're flushing with our flush.
@@ -1659,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev)
i915_gem_retire_request(dev, request);
list_del(&request->list);
+ list_del(&request->client_list);
drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
} else
break;
@@ -1697,7 +1714,10 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
BUG_ON(seqno == 0);
if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
- ier = I915_READ(IER);
+ if (IS_IGDNG(dev))
+ ier = I915_READ(DEIER) | I915_READ(GTIER);
+ else
+ ier = I915_READ(IER);
if (!ier) {
DRM_ERROR("something (likely vbetool) disabled "
"interrupts, re-enabling\n");
@@ -1749,8 +1769,7 @@ i915_gem_flush(struct drm_device *dev,
if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
- if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
- I915_GEM_DOMAIN_GTT)) {
+ if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
/*
* read/write caches:
*
@@ -1972,7 +1991,7 @@ i915_gem_evict_something(struct drm_device *dev)
i915_gem_flush(dev,
obj->write_domain,
obj->write_domain);
- i915_add_request(dev, obj->write_domain);
+ i915_add_request(dev, NULL, obj->write_domain);
obj = NULL;
continue;
@@ -1986,7 +2005,7 @@ i915_gem_evict_something(struct drm_device *dev)
/* If we didn't do any of the above, there's nothing to be done
* and we just can't fit it in.
*/
- return -ENOMEM;
+ return -ENOSPC;
}
return ret;
}
@@ -2001,7 +2020,7 @@ i915_gem_evict_everything(struct drm_device *dev)
if (ret != 0)
break;
}
- if (ret == -ENOMEM)
+ if (ret == -ENOSPC)
return 0;
return ret;
}
@@ -2024,8 +2043,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
*/
page_count = obj->size / PAGE_SIZE;
BUG_ON(obj_priv->pages != NULL);
- obj_priv->pages = drm_calloc(page_count, sizeof(struct page *),
- DRM_MEM_DRIVER);
+ obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
if (obj_priv->pages == NULL) {
DRM_ERROR("Faled to allocate page list\n");
obj_priv->pages_refcount--;
@@ -2131,8 +2149,10 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
return;
}
- pitch_val = (obj_priv->stride / 128) - 1;
- WARN_ON(pitch_val & ~0x0000000f);
+ pitch_val = obj_priv->stride / 128;
+ pitch_val = ffs(pitch_val) - 1;
+ WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
+
val = obj_priv->gtt_offset;
if (obj_priv->tiling_mode == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
@@ -2209,7 +2229,7 @@ try_again:
loff_t offset;
if (avail == 0)
- return -ENOMEM;
+ return -ENOSPC;
for (i = dev_priv->fence_reg_start;
i < dev_priv->num_fence_regs; i++) {
@@ -2242,7 +2262,7 @@ try_again:
i915_gem_flush(dev,
I915_GEM_GPU_DOMAINS,
I915_GEM_GPU_DOMAINS);
- seqno = i915_add_request(dev,
+ seqno = i915_add_request(dev, NULL,
I915_GEM_GPU_DOMAINS);
if (seqno == 0)
return -ENOMEM;
@@ -2254,9 +2274,6 @@ try_again:
goto try_again;
}
- BUG_ON(old_obj_priv->active ||
- (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
-
/*
* Zap this virtual mapping so we can set up a fence again
* for this object next time we need it.
@@ -2361,7 +2378,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
spin_unlock(&dev_priv->mm.active_list_lock);
if (lists_empty) {
DRM_ERROR("GTT full, but LRU list empty\n");
- return -ENOMEM;
+ return -ENOSPC;
}
ret = i915_gem_evict_something(dev);
@@ -2406,8 +2423,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
* wasn't in the GTT, there shouldn't be any way it could have been in
* a GPU cache
*/
- BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
- BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
+ BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+ BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
return 0;
}
@@ -2424,6 +2441,16 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
if (obj_priv->pages == NULL)
return;
+ /* XXX: The 865 in particular appears to be weird in how it handles
+ * cache flushing. We haven't figured it out, but the
+ * clflush+agp_chipset_flush doesn't appear to successfully get the
+ * data visible to the PGU, while wbinvd + agp_chipset_flush does.
+ */
+ if (IS_I865G(obj->dev)) {
+ wbinvd();
+ return;
+ }
+
drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
}
@@ -2439,7 +2466,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
/* Queue the GPU write cache flushing we need. */
i915_gem_flush(dev, 0, obj->write_domain);
- seqno = i915_add_request(dev, obj->write_domain);
+ seqno = i915_add_request(dev, NULL, obj->write_domain);
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
}
@@ -3022,20 +3049,12 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
drm_i915_private_t *dev_priv = dev->dev_private;
int nbox = exec->num_cliprects;
int i = 0, count;
- uint32_t exec_start, exec_len;
+ uint32_t exec_start, exec_len;
RING_LOCALS;
exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
exec_len = (uint32_t) exec->batch_len;
- if ((exec_start | exec_len) & 0x7) {
- DRM_ERROR("alignment\n");
- return -EINVAL;
- }
-
- if (!exec_start)
- return -EINVAL;
-
count = nbox ? nbox : 1;
for (i = 0; i < count; i++) {
@@ -3076,6 +3095,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
* This should get us reasonable parallelism between CPU and GPU but also
* relatively low latency when blocking on a particular request to finish.
*/
@@ -3084,15 +3107,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
{
struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
int ret = 0;
- uint32_t seqno;
+ unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
mutex_lock(&dev->struct_mutex);
- seqno = i915_file_priv->mm.last_gem_throttle_seqno;
- i915_file_priv->mm.last_gem_throttle_seqno =
- i915_file_priv->mm.last_gem_seqno;
- if (seqno)
- ret = i915_wait_request(dev, seqno);
+ while (!list_empty(&i915_file_priv->mm.request_list)) {
+ struct drm_i915_gem_request *request;
+
+ request = list_first_entry(&i915_file_priv->mm.request_list,
+ struct drm_i915_gem_request,
+ client_list);
+
+ if (time_after_eq(request->emitted_jiffies, recent_enough))
+ break;
+
+ ret = i915_wait_request(dev, request->seqno);
+ if (ret != 0)
+ break;
+ }
mutex_unlock(&dev->struct_mutex);
+
return ret;
}
@@ -3111,7 +3144,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
reloc_count += exec_list[i].relocation_count;
}
- *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER);
+ *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
if (*relocs == NULL)
return -ENOMEM;
@@ -3125,8 +3158,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
exec_list[i].relocation_count *
sizeof(**relocs));
if (ret != 0) {
- drm_free(*relocs, reloc_count * sizeof(**relocs),
- DRM_MEM_DRIVER);
+ drm_free_large(*relocs);
*relocs = NULL;
return -EFAULT;
}
@@ -3165,17 +3197,34 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
}
err:
- drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER);
+ drm_free_large(relocs);
return ret;
}
+static int
+i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
+ uint64_t exec_offset)
+{
+ uint32_t exec_start, exec_len;
+
+ exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+ exec_len = (uint32_t) exec->batch_len;
+
+ if ((exec_start | exec_len) & 0x7)
+ return -EINVAL;
+
+ if (!exec_start)
+ return -EINVAL;
+
+ return 0;
+}
+
int
i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
struct drm_i915_gem_execbuffer *args = data;
struct drm_i915_gem_exec_object *exec_list = NULL;
struct drm_gem_object **object_list = NULL;
@@ -3198,10 +3247,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
return -EINVAL;
}
/* Copy in the exec list from userland */
- exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
- DRM_MEM_DRIVER);
- object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
- DRM_MEM_DRIVER);
+ exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
+ object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
if (exec_list == NULL || object_list == NULL) {
DRM_ERROR("Failed to allocate exec or object list "
"for %d buffers\n",
@@ -3302,7 +3349,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
break;
/* error other than GTT full, or we've already tried again */
- if (ret != -ENOMEM || pin_tries >= 1) {
+ if (ret != -ENOSPC || pin_tries >= 1) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Failed to pin buffers %d\n", ret);
goto err;
@@ -3321,8 +3368,20 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
/* Set the pending read domains for the batch buffer to COMMAND */
batch_obj = object_list[args->buffer_count-1];
- batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
- batch_obj->pending_write_domain = 0;
+ if (batch_obj->pending_write_domain) {
+ DRM_ERROR("Attempting to use self-modifying batch buffer\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+
+ /* Sanity check the batch buffer, prior to moving objects */
+ exec_offset = exec_list[args->buffer_count - 1].offset;
+ ret = i915_gem_check_execbuffer (args, exec_offset);
+ if (ret != 0) {
+ DRM_ERROR("execbuf with invalid offset/length\n");
+ goto err;
+ }
i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3353,7 +3412,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
dev->invalidate_domains,
dev->flush_domains);
if (dev->flush_domains)
- (void)i915_add_request(dev, dev->flush_domains);
+ (void)i915_add_request(dev, file_priv,
+ dev->flush_domains);
}
for (i = 0; i < args->buffer_count; i++) {
@@ -3371,8 +3431,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
}
#endif
- exec_offset = exec_list[args->buffer_count - 1].offset;
-
#if WATCH_EXEC
i915_gem_dump_object(batch_obj,
args->batch_len,
@@ -3402,9 +3460,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
* *some* interrupts representing completion of buffers that we can
* wait on when trying to clear up gtt space).
*/
- seqno = i915_add_request(dev, flush_domains);
+ seqno = i915_add_request(dev, file_priv, flush_domains);
BUG_ON(seqno == 0);
- i915_file_priv->mm.last_gem_seqno = seqno;
for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];
@@ -3462,10 +3519,8 @@ err:
}
pre_mutex_err:
- drm_free(object_list, sizeof(*object_list) * args->buffer_count,
- DRM_MEM_DRIVER);
- drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
- DRM_MEM_DRIVER);
+ drm_free_large(object_list);
+ drm_free_large(exec_list);
drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,
DRM_MEM_DRIVER);
@@ -3512,8 +3567,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
atomic_inc(&dev->pin_count);
atomic_add(obj->size, &dev->pin_memory);
if (!obj_priv->active &&
- (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
- I915_GEM_DOMAIN_GTT)) == 0 &&
+ (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
!list_empty(&obj_priv->list))
list_del_init(&obj_priv->list);
}
@@ -3540,8 +3594,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)
*/
if (obj_priv->pin_count == 0) {
if (!obj_priv->active &&
- (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
- I915_GEM_DOMAIN_GTT)) == 0)
+ (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
list_move_tail(&obj_priv->list,
&dev_priv->mm.inactive_list);
atomic_dec(&dev->pin_count);
@@ -3645,15 +3698,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_gem_object *obj;
struct drm_i915_gem_object *obj_priv;
- mutex_lock(&dev->struct_mutex);
obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (obj == NULL) {
DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
args->handle);
- mutex_unlock(&dev->struct_mutex);
return -EBADF;
}
+ mutex_lock(&dev->struct_mutex);
/* Update the active list for the hardware's current position.
* Otherwise this only updates on a delayed timer or when irqs are
* actually unmasked, and our working set ends up being larger than
@@ -3792,9 +3844,8 @@ i915_gem_idle(struct drm_device *dev)
/* Flush the GPU along with all non-CPU write domains
*/
- i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
- ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
- seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
if (seqno == 0) {
mutex_unlock(&dev->struct_mutex);
@@ -4344,3 +4395,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
drm_agp_chipset_flush(dev);
return 0;
}
+
+void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
+{
+ struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+
+ /* Clean up our request list when the client is going away, so that
+ * later retire_requests won't dereference our soon-to-be-gone
+ * file_priv.
+ */
+ mutex_lock(&dev->struct_mutex);
+ while (!list_empty(&i915_file_priv->mm.request_list))
+ list_del_init(i915_file_priv->mm.request_list.next);
+ mutex_unlock(&dev->struct_mutex);
+}