summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 23:18:18 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 23:18:18 +0200
commitbd22dc17e49973d3d4925970260e9e37f7580a9f (patch)
tree581a7c7527f628aa91eb2e0680b765a9673bc974 /drivers/gpu/drm/i915/i915_gem.c
parentMerge tag 'dlm-3.6' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland... (diff)
parentMerge branch 'drm-intel-fixes' of git://people.freedesktop.org/~danvet/drm-in... (diff)
downloadlinux-bd22dc17e49973d3d4925970260e9e37f7580a9f.tar.xz
linux-bd22dc17e49973d3d4925970260e9e37f7580a9f.zip
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "One of the smaller drm -next pulls in ages! Ben (nouveau) has a rewrite in progress but we decided to leave it stew for another cycle, so just some fixes from him. - radeon: lots of documentation work, fixes, more ring and locking changes, pcie gen2, more dp fixes. - i915: haswell features, gpu reset fixes, /dev/agpgart removal on machines that we never used it on, more VGA/HDP fix., more DP fixes - drm core: cleanups from Daniel, sis 64-bit fixes, range allocator colouring. but yeah fairly quiet merge this time, probably because I missed half of it!" Trivial add-add conflict in include/linux/pci_regs.h * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (255 commits) drm/nouveau: init vblank requests list drm/nv50: extend vblank semaphore to generic dmaobj + offset pair drm/nouveau: mark most of our ioctls as deprecated, move to compat layer drm/nouveau: move current gpuobj code out of nouveau_object.c drm/nouveau/gem: fix object reference leak in a failure path drm/nv50: rename INVALID_QUERY_OR_TEXTURE error to INVALID_OPERATION drm/nv84: decode PCRYPT errors drm/nouveau: dcb table quirk for fdo#50830 nouveau: Fix alignment requirements on src and dst addresses drm/i915: unbreak lastclose for failed driver init drm/i915: Set the context before setting up regs for the context. drm/i915: constify mode in crtc_mode_fixup drm/i915/lvds: ditch ->prepare special case drm/i915: dereferencing an error pointer drm/i915: fix invalid reference handling of the default ctx obj drm/i915: Add -EIO to the list of known errors for __wait_seqno drm/i915: Flush the context object from the CPU caches upon switching drm/radeon: fix dpms on/off on trinity/aruba v2 drm/radeon: on hotplug force link training to happen (v2) drm/radeon: fix hotplug of DP to DVI|HDMI passive adapters (v2) ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c351
1 files changed, 293 insertions, 58 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 288d7b8f49ae..5c4657a54f97 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -96,9 +96,18 @@ i915_gem_wait_for_error(struct drm_device *dev)
if (!atomic_read(&dev_priv->mm.wedged))
return 0;
- ret = wait_for_completion_interruptible(x);
- if (ret)
+ /*
+ * Only wait 10 seconds for the gpu reset to complete to avoid hanging
+ * userspace. If it takes that long something really bad is going on and
+ * we should simply try to bail out and fail as gracefully as possible.
+ */
+ ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
+ if (ret == 0) {
+ DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
+ return -EIO;
+ } else if (ret < 0) {
return ret;
+ }
if (atomic_read(&dev_priv->mm.wedged)) {
/* GPU is hung, bump the completion count to account for
@@ -1122,7 +1131,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
obj->fault_mappable = true;
- pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
+ pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) +
page_offset;
/* Finally, remap it using the new GTT offset */
@@ -1132,6 +1141,11 @@ unlock:
out:
switch (ret) {
case -EIO:
+ /* If this -EIO is due to a gpu hang, give the reset code a
+ * chance to clean up the mess. Otherwise return the proper
+ * SIGBUS. */
+ if (!atomic_read(&dev_priv->mm.wedged))
+ return VM_FAULT_SIGBUS;
case -EAGAIN:
/* Give the error handler a chance to run and move the
* objects off the GPU active list. Next time we service the
@@ -1568,6 +1582,21 @@ i915_add_request(struct intel_ring_buffer *ring,
int was_empty;
int ret;
+ /*
+ * Emit any outstanding flushes - execbuf can fail to emit the flush
+ * after having emitted the batchbuffer command. Hence we need to fix
+ * things up similar to emitting the lazy request. The difference here
+ * is that the flush _must_ happen before the next request, no matter
+ * what.
+ */
+ if (ring->gpu_caches_dirty) {
+ ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS);
+ if (ret)
+ return ret;
+
+ ring->gpu_caches_dirty = false;
+ }
+
BUG_ON(request == NULL);
seqno = i915_gem_next_request_seqno(ring);
@@ -1613,6 +1642,9 @@ i915_add_request(struct intel_ring_buffer *ring,
queue_delayed_work(dev_priv->wq,
&dev_priv->mm.retire_work, HZ);
}
+
+ WARN_ON(!list_empty(&ring->gpu_write_list));
+
return 0;
}
@@ -1827,14 +1859,11 @@ i915_gem_retire_work_handler(struct work_struct *work)
*/
idle = true;
for_each_ring(ring, dev_priv, i) {
- if (!list_empty(&ring->gpu_write_list)) {
+ if (ring->gpu_caches_dirty) {
struct drm_i915_gem_request *request;
- int ret;
- ret = i915_gem_flush_ring(ring,
- 0, I915_GEM_GPU_DOMAINS);
request = kzalloc(sizeof(*request), GFP_KERNEL);
- if (ret || request == NULL ||
+ if (request == NULL ||
i915_add_request(ring, NULL, request))
kfree(request);
}
@@ -1848,11 +1877,10 @@ i915_gem_retire_work_handler(struct work_struct *work)
mutex_unlock(&dev->struct_mutex);
}
-static int
-i915_gem_check_wedge(struct drm_i915_private *dev_priv)
+int
+i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+ bool interruptible)
{
- BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
-
if (atomic_read(&dev_priv->mm.wedged)) {
struct completion *x = &dev_priv->error_completion;
bool recovery_complete;
@@ -1863,7 +1891,16 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv)
recovery_complete = x->done > 0;
spin_unlock_irqrestore(&x->wait.lock, flags);
- return recovery_complete ? -EIO : -EAGAIN;
+ /* Non-interruptible callers can't handle -EAGAIN, hence return
+ * -EIO unconditionally for these. */
+ if (!interruptible)
+ return -EIO;
+
+ /* Recovery complete, but still wedged means reset failure. */
+ if (recovery_complete)
+ return -EIO;
+
+ return -EAGAIN;
}
return 0;
@@ -1899,34 +1936,85 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
return ret;
}
+/**
+ * __wait_seqno - wait until execution of seqno has finished
+ * @ring: the ring expected to report seqno
+ * @seqno: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ *
+ * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
- bool interruptible)
+ bool interruptible, struct timespec *timeout)
{
drm_i915_private_t *dev_priv = ring->dev->dev_private;
- int ret = 0;
+ struct timespec before, now, wait_time={1,0};
+ unsigned long timeout_jiffies;
+ long end;
+ bool wait_forever = true;
+ int ret;
if (i915_seqno_passed(ring->get_seqno(ring), seqno))
return 0;
trace_i915_gem_request_wait_begin(ring, seqno);
+
+ if (timeout != NULL) {
+ wait_time = *timeout;
+ wait_forever = false;
+ }
+
+ timeout_jiffies = timespec_to_jiffies(&wait_time);
+
if (WARN_ON(!ring->irq_get(ring)))
return -ENODEV;
+ /* Record current time in case interrupted by signal, or wedged * */
+ getrawmonotonic(&before);
+
#define EXIT_COND \
(i915_seqno_passed(ring->get_seqno(ring), seqno) || \
atomic_read(&dev_priv->mm.wedged))
+ do {
+ if (interruptible)
+ end = wait_event_interruptible_timeout(ring->irq_queue,
+ EXIT_COND,
+ timeout_jiffies);
+ else
+ end = wait_event_timeout(ring->irq_queue, EXIT_COND,
+ timeout_jiffies);
- if (interruptible)
- ret = wait_event_interruptible(ring->irq_queue,
- EXIT_COND);
- else
- wait_event(ring->irq_queue, EXIT_COND);
+ ret = i915_gem_check_wedge(dev_priv, interruptible);
+ if (ret)
+ end = ret;
+ } while (end == 0 && wait_forever);
+
+ getrawmonotonic(&now);
ring->irq_put(ring);
trace_i915_gem_request_wait_end(ring, seqno);
#undef EXIT_COND
- return ret;
+ if (timeout) {
+ struct timespec sleep_time = timespec_sub(now, before);
+ *timeout = timespec_sub(*timeout, sleep_time);
+ }
+
+ switch (end) {
+ case -EIO:
+ case -EAGAIN: /* Wedged */
+ case -ERESTARTSYS: /* Signal */
+ return (int)end;
+ case 0: /* Timeout */
+ if (timeout)
+ set_normalized_timespec(timeout, 0, 0);
+ return -ETIME;
+ default: /* Completed */
+ WARN_ON(end < 0); /* We're not aware of other errors */
+ return 0;
+ }
}
/**
@@ -1934,15 +2022,14 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
* request and object lists appropriately for that event.
*/
int
-i915_wait_request(struct intel_ring_buffer *ring,
- uint32_t seqno)
+i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
{
drm_i915_private_t *dev_priv = ring->dev->dev_private;
int ret = 0;
BUG_ON(seqno == 0);
- ret = i915_gem_check_wedge(dev_priv);
+ ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
if (ret)
return ret;
@@ -1950,9 +2037,7 @@ i915_wait_request(struct intel_ring_buffer *ring,
if (ret)
return ret;
- ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible);
- if (atomic_read(&dev_priv->mm.wedged))
- ret = -EAGAIN;
+ ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL);
return ret;
}
@@ -1975,7 +2060,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
* it.
*/
if (obj->active) {
- ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
+ ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno);
if (ret)
return ret;
i915_gem_retire_requests_ring(obj->ring);
@@ -1985,6 +2070,115 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
}
/**
+ * Ensures that an object will eventually get non-busy by flushing any required
+ * write domains, emitting any outstanding lazy request and retiring and
+ * completed requests.
+ */
+static int
+i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
+{
+ int ret;
+
+ if (obj->active) {
+ ret = i915_gem_object_flush_gpu_write_domain(obj);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_check_olr(obj->ring,
+ obj->last_rendering_seqno);
+ if (ret)
+ return ret;
+ i915_gem_retire_requests_ring(obj->ring);
+ }
+
+ return 0;
+}
+
+/**
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @DRM_IOCTL_ARGS: standard ioctl arguments
+ *
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ * -ETIME: object is still busy after timeout
+ * -ERESTARTSYS: signal interrupted the wait
+ * -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ * -EAGAIN: GPU wedged
+ * -ENOMEM: damn
+ * -ENODEV: Internal IRQ fail
+ * -E?: The add request failed
+ *
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
+ */
+int
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_i915_gem_wait *args = data;
+ struct drm_i915_gem_object *obj;
+ struct intel_ring_buffer *ring = NULL;
+ struct timespec timeout_stack, *timeout = NULL;
+ u32 seqno = 0;
+ int ret = 0;
+
+ if (args->timeout_ns >= 0) {
+ timeout_stack = ns_to_timespec(args->timeout_ns);
+ timeout = &timeout_stack;
+ }
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ return ret;
+
+ obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
+ if (&obj->base == NULL) {
+ mutex_unlock(&dev->struct_mutex);
+ return -ENOENT;
+ }
+
+ /* Need to make sure the object gets inactive eventually. */
+ ret = i915_gem_object_flush_active(obj);
+ if (ret)
+ goto out;
+
+ if (obj->active) {
+ seqno = obj->last_rendering_seqno;
+ ring = obj->ring;
+ }
+
+ if (seqno == 0)
+ goto out;
+
+ /* Do this after OLR check to make sure we make forward progress polling
+ * on this IOCTL with a 0 timeout (like busy ioctl)
+ */
+ if (!args->timeout_ns) {
+ ret = -ETIME;
+ goto out;
+ }
+
+ drm_gem_object_unreference(&obj->base);
+ mutex_unlock(&dev->struct_mutex);
+
+ ret = __wait_seqno(ring, seqno, true, timeout);
+ if (timeout) {
+ WARN_ON(!timespec_valid(timeout));
+ args->timeout_ns = timespec_to_ns(timeout);
+ }
+ return ret;
+
+out:
+ drm_gem_object_unreference(&obj->base);
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+}
+
+/**
* i915_gem_object_sync - sync an object to a ring.
*
* @obj: object which may be in use on another ring.
@@ -2160,7 +2354,7 @@ static int i915_ring_idle(struct intel_ring_buffer *ring)
return ret;
}
- return i915_wait_request(ring, i915_gem_next_request_seqno(ring));
+ return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
}
int i915_gpu_idle(struct drm_device *dev)
@@ -2178,6 +2372,10 @@ int i915_gpu_idle(struct drm_device *dev)
/* Is the device fubar? */
if (WARN_ON(!list_empty(&ring->gpu_write_list)))
return -EBUSY;
+
+ ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
+ if (ret)
+ return ret;
}
return 0;
@@ -2364,7 +2562,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
}
if (obj->last_fenced_seqno) {
- ret = i915_wait_request(obj->ring, obj->last_fenced_seqno);
+ ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
if (ret)
return ret;
@@ -2551,8 +2749,8 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
if (map_and_fenceable)
free_space =
drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
- size, alignment, 0,
- dev_priv->mm.gtt_mappable_end,
+ size, alignment,
+ 0, dev_priv->mm.gtt_mappable_end,
0);
else
free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
@@ -2563,7 +2761,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
obj->gtt_space =
drm_mm_get_block_range_generic(free_space,
size, alignment, 0,
- dev_priv->mm.gtt_mappable_end,
+ 0, dev_priv->mm.gtt_mappable_end,
0);
else
obj->gtt_space =
@@ -3030,7 +3228,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
if (seqno == 0)
return 0;
- ret = __wait_seqno(ring, seqno, true);
+ ret = __wait_seqno(ring, seqno, true, NULL);
if (ret == 0)
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
@@ -3199,30 +3397,9 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
* become non-busy without any further actions, therefore emit any
* necessary flushes here.
*/
- args->busy = obj->active;
- if (args->busy) {
- /* Unconditionally flush objects, even when the gpu still uses this
- * object. Userspace calling this function indicates that it wants to
- * use this buffer rather sooner than later, so issuing the required
- * flush earlier is beneficial.
- */
- if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
- ret = i915_gem_flush_ring(obj->ring,
- 0, obj->base.write_domain);
- } else {
- ret = i915_gem_check_olr(obj->ring,
- obj->last_rendering_seqno);
- }
+ ret = i915_gem_object_flush_active(obj);
- /* Update the active list for the hardware's current position.
- * Otherwise this only updates on a delayed timer or when irqs
- * are actually unmasked, and our working set ends up being
- * larger than required.
- */
- i915_gem_retire_requests_ring(obj->ring);
-
- args->busy = obj->active;
- }
+ args->busy = obj->active;
drm_gem_object_unreference(&obj->base);
unlock:
@@ -3435,6 +3612,38 @@ i915_gem_idle(struct drm_device *dev)
return 0;
}
+void i915_gem_l3_remap(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ u32 misccpctl;
+ int i;
+
+ if (!IS_IVYBRIDGE(dev))
+ return;
+
+ if (!dev_priv->mm.l3_remap_info)
+ return;
+
+ misccpctl = I915_READ(GEN7_MISCCPCTL);
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+ POSTING_READ(GEN7_MISCCPCTL);
+
+ for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
+ u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
+ if (remap && remap != dev_priv->mm.l3_remap_info[i/4])
+ DRM_DEBUG("0x%x was already programmed to %x\n",
+ GEN7_L3LOG_BASE + i, remap);
+ if (remap && !dev_priv->mm.l3_remap_info[i/4])
+ DRM_DEBUG_DRIVER("Clearing remapped register\n");
+ I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->mm.l3_remap_info[i/4]);
+ }
+
+ /* Make sure all the writes land before disabling dop clock gating */
+ POSTING_READ(GEN7_L3LOG_BASE);
+
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+}
+
void i915_gem_init_swizzling(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
@@ -3518,12 +3727,33 @@ void i915_gem_init_ppgtt(struct drm_device *dev)
}
}
+static bool
+intel_enable_blt(struct drm_device *dev)
+{
+ if (!HAS_BLT(dev))
+ return false;
+
+ /* The blitter was dysfunctional on early prototypes */
+ if (IS_GEN6(dev) && dev->pdev->revision < 8) {
+ DRM_INFO("BLT not supported on this pre-production hardware;"
+ " graphics performance will be degraded.\n");
+ return false;
+ }
+
+ return true;
+}
+
int
i915_gem_init_hw(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
int ret;
+ if (!intel_enable_gtt())
+ return -EIO;
+
+ i915_gem_l3_remap(dev);
+
i915_gem_init_swizzling(dev);
ret = intel_init_render_ring_buffer(dev);
@@ -3536,7 +3766,7 @@ i915_gem_init_hw(struct drm_device *dev)
goto cleanup_render_ring;
}
- if (HAS_BLT(dev)) {
+ if (intel_enable_blt(dev)) {
ret = intel_init_blt_ring_buffer(dev);
if (ret)
goto cleanup_bsd_ring;
@@ -3544,6 +3774,11 @@ i915_gem_init_hw(struct drm_device *dev)
dev_priv->next_seqno = 1;
+ /*
+ * XXX: There was some w/a described somewhere suggesting loading
+ * contexts before PPGTT.
+ */
+ i915_gem_context_init(dev);
i915_gem_init_ppgtt(dev);
return 0;