diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-26 23:18:18 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-26 23:18:18 +0200 |
commit | bd22dc17e49973d3d4925970260e9e37f7580a9f (patch) | |
tree | 581a7c7527f628aa91eb2e0680b765a9673bc974 /drivers/gpu/drm/i915/i915_gem.c | |
parent | Merge tag 'dlm-3.6' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland... (diff) | |
parent | Merge branch 'drm-intel-fixes' of git://people.freedesktop.org/~danvet/drm-in... (diff) | |
download | linux-bd22dc17e49973d3d4925970260e9e37f7580a9f.tar.xz linux-bd22dc17e49973d3d4925970260e9e37f7580a9f.zip |
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"One of the smaller drm -next pulls in ages!
Ben (nouveau) has a rewrite in progress but we decided to leave it
stew for another cycle, so just some fixes from him.
- radeon: lots of documentation work, fixes, more ring and locking
changes, pcie gen2, more dp fixes.
- i915: haswell features, gpu reset fixes, /dev/agpgart removal on
machines that we never used it on, more VGA/HDP fix., more DP fixes
- drm core: cleanups from Daniel, sis 64-bit fixes, range allocator
colouring.
but yeah fairly quiet merge this time, probably because I missed half
of it!"
Trivial add-add conflict in include/linux/pci_regs.h
* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (255 commits)
drm/nouveau: init vblank requests list
drm/nv50: extend vblank semaphore to generic dmaobj + offset pair
drm/nouveau: mark most of our ioctls as deprecated, move to compat layer
drm/nouveau: move current gpuobj code out of nouveau_object.c
drm/nouveau/gem: fix object reference leak in a failure path
drm/nv50: rename INVALID_QUERY_OR_TEXTURE error to INVALID_OPERATION
drm/nv84: decode PCRYPT errors
drm/nouveau: dcb table quirk for fdo#50830
nouveau: Fix alignment requirements on src and dst addresses
drm/i915: unbreak lastclose for failed driver init
drm/i915: Set the context before setting up regs for the context.
drm/i915: constify mode in crtc_mode_fixup
drm/i915/lvds: ditch ->prepare special case
drm/i915: dereferencing an error pointer
drm/i915: fix invalid reference handling of the default ctx obj
drm/i915: Add -EIO to the list of known errors for __wait_seqno
drm/i915: Flush the context object from the CPU caches upon switching
drm/radeon: fix dpms on/off on trinity/aruba v2
drm/radeon: on hotplug force link training to happen (v2)
drm/radeon: fix hotplug of DP to DVI|HDMI passive adapters (v2)
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 351 |
1 files changed, 293 insertions, 58 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 288d7b8f49ae..5c4657a54f97 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -96,9 +96,18 @@ i915_gem_wait_for_error(struct drm_device *dev) if (!atomic_read(&dev_priv->mm.wedged)) return 0; - ret = wait_for_completion_interruptible(x); - if (ret) + /* + * Only wait 10 seconds for the gpu reset to complete to avoid hanging + * userspace. If it takes that long something really bad is going on and + * we should simply try to bail out and fail as gracefully as possible. + */ + ret = wait_for_completion_interruptible_timeout(x, 10*HZ); + if (ret == 0) { + DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); + return -EIO; + } else if (ret < 0) { return ret; + } if (atomic_read(&dev_priv->mm.wedged)) { /* GPU is hung, bump the completion count to account for @@ -1122,7 +1131,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) obj->fault_mappable = true; - pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + + pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) + page_offset; /* Finally, remap it using the new GTT offset */ @@ -1132,6 +1141,11 @@ unlock: out: switch (ret) { case -EIO: + /* If this -EIO is due to a gpu hang, give the reset code a + * chance to clean up the mess. Otherwise return the proper + * SIGBUS. */ + if (!atomic_read(&dev_priv->mm.wedged)) + return VM_FAULT_SIGBUS; case -EAGAIN: /* Give the error handler a chance to run and move the * objects off the GPU active list. Next time we service the @@ -1568,6 +1582,21 @@ i915_add_request(struct intel_ring_buffer *ring, int was_empty; int ret; + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (ring->gpu_caches_dirty) { + ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + } + BUG_ON(request == NULL); seqno = i915_gem_next_request_seqno(ring); @@ -1613,6 +1642,9 @@ i915_add_request(struct intel_ring_buffer *ring, queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); } + + WARN_ON(!list_empty(&ring->gpu_write_list)); + return 0; } @@ -1827,14 +1859,11 @@ i915_gem_retire_work_handler(struct work_struct *work) */ idle = true; for_each_ring(ring, dev_priv, i) { - if (!list_empty(&ring->gpu_write_list)) { + if (ring->gpu_caches_dirty) { struct drm_i915_gem_request *request; - int ret; - ret = i915_gem_flush_ring(ring, - 0, I915_GEM_GPU_DOMAINS); request = kzalloc(sizeof(*request), GFP_KERNEL); - if (ret || request == NULL || + if (request == NULL || i915_add_request(ring, NULL, request)) kfree(request); } @@ -1848,11 +1877,10 @@ i915_gem_retire_work_handler(struct work_struct *work) mutex_unlock(&dev->struct_mutex); } -static int -i915_gem_check_wedge(struct drm_i915_private *dev_priv) +int +i915_gem_check_wedge(struct drm_i915_private *dev_priv, + bool interruptible) { - BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (atomic_read(&dev_priv->mm.wedged)) { struct completion *x = &dev_priv->error_completion; bool recovery_complete; @@ -1863,7 +1891,16 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv) recovery_complete = x->done > 0; spin_unlock_irqrestore(&x->wait.lock, flags); - return recovery_complete ? -EIO : -EAGAIN; + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. */ + if (!interruptible) + return -EIO; + + /* Recovery complete, but still wedged means reset failure. */ + if (recovery_complete) + return -EIO; + + return -EAGAIN; } return 0; @@ -1899,34 +1936,85 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) return ret; } +/** + * __wait_seqno - wait until execution of seqno has finished + * @ring: the ring expected to report seqno + * @seqno: duh! + * @interruptible: do an interruptible wait (normally yes) + * @timeout: in - how long to wait (NULL forever); out - how much time remaining + * + * Returns 0 if the seqno was found within the alloted time. Else returns the + * errno with remaining time filled in timeout argument. + */ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, - bool interruptible) + bool interruptible, struct timespec *timeout) { drm_i915_private_t *dev_priv = ring->dev->dev_private; - int ret = 0; + struct timespec before, now, wait_time={1,0}; + unsigned long timeout_jiffies; + long end; + bool wait_forever = true; + int ret; if (i915_seqno_passed(ring->get_seqno(ring), seqno)) return 0; trace_i915_gem_request_wait_begin(ring, seqno); + + if (timeout != NULL) { + wait_time = *timeout; + wait_forever = false; + } + + timeout_jiffies = timespec_to_jiffies(&wait_time); + if (WARN_ON(!ring->irq_get(ring))) return -ENODEV; + /* Record current time in case interrupted by signal, or wedged * */ + getrawmonotonic(&before); + #define EXIT_COND \ (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ atomic_read(&dev_priv->mm.wedged)) + do { + if (interruptible) + end = wait_event_interruptible_timeout(ring->irq_queue, + EXIT_COND, + timeout_jiffies); + else + end = wait_event_timeout(ring->irq_queue, EXIT_COND, + timeout_jiffies); - if (interruptible) - ret = wait_event_interruptible(ring->irq_queue, - EXIT_COND); - else - wait_event(ring->irq_queue, EXIT_COND); + ret = i915_gem_check_wedge(dev_priv, interruptible); + if (ret) + end = ret; + } while (end == 0 && wait_forever); + + getrawmonotonic(&now); ring->irq_put(ring); trace_i915_gem_request_wait_end(ring, seqno); #undef EXIT_COND - return ret; + if (timeout) { + struct timespec sleep_time = timespec_sub(now, before); + *timeout = timespec_sub(*timeout, sleep_time); + } + + switch (end) { + case -EIO: + case -EAGAIN: /* Wedged */ + case -ERESTARTSYS: /* Signal */ + return (int)end; + case 0: /* Timeout */ + if (timeout) + set_normalized_timespec(timeout, 0, 0); + return -ETIME; + default: /* Completed */ + WARN_ON(end < 0); /* We're not aware of other errors */ + return 0; + } } /** @@ -1934,15 +2022,14 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, * request and object lists appropriately for that event. */ int -i915_wait_request(struct intel_ring_buffer *ring, - uint32_t seqno) +i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) { drm_i915_private_t *dev_priv = ring->dev->dev_private; int ret = 0; BUG_ON(seqno == 0); - ret = i915_gem_check_wedge(dev_priv); + ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); if (ret) return ret; @@ -1950,9 +2037,7 @@ i915_wait_request(struct intel_ring_buffer *ring, if (ret) return ret; - ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible); - if (atomic_read(&dev_priv->mm.wedged)) - ret = -EAGAIN; + ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL); return ret; } @@ -1975,7 +2060,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) * it. */ if (obj->active) { - ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); + ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno); if (ret) return ret; i915_gem_retire_requests_ring(obj->ring); @@ -1985,6 +2070,115 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) } /** + * Ensures that an object will eventually get non-busy by flushing any required + * write domains, emitting any outstanding lazy request and retiring and + * completed requests. + */ +static int +i915_gem_object_flush_active(struct drm_i915_gem_object *obj) +{ + int ret; + + if (obj->active) { + ret = i915_gem_object_flush_gpu_write_domain(obj); + if (ret) + return ret; + + ret = i915_gem_check_olr(obj->ring, + obj->last_rendering_seqno); + if (ret) + return ret; + i915_gem_retire_requests_ring(obj->ring); + } + + return 0; +} + +/** + * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT + * @DRM_IOCTL_ARGS: standard ioctl arguments + * + * Returns 0 if successful, else an error is returned with the remaining time in + * the timeout parameter. + * -ETIME: object is still busy after timeout + * -ERESTARTSYS: signal interrupted the wait + * -ENONENT: object doesn't exist + * Also possible, but rare: + * -EAGAIN: GPU wedged + * -ENOMEM: damn + * -ENODEV: Internal IRQ fail + * -E?: The add request failed + * + * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any + * non-zero timeout parameter the wait ioctl will wait for the given number of + * nanoseconds on an object becoming unbusy. Since the wait itself does so + * without holding struct_mutex the object may become re-busied before this + * function completes. A similar but shorter * race condition exists in the busy + * ioctl + */ +int +i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_i915_gem_wait *args = data; + struct drm_i915_gem_object *obj; + struct intel_ring_buffer *ring = NULL; + struct timespec timeout_stack, *timeout = NULL; + u32 seqno = 0; + int ret = 0; + + if (args->timeout_ns >= 0) { + timeout_stack = ns_to_timespec(args->timeout_ns); + timeout = &timeout_stack; + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); + if (&obj->base == NULL) { + mutex_unlock(&dev->struct_mutex); + return -ENOENT; + } + + /* Need to make sure the object gets inactive eventually. */ + ret = i915_gem_object_flush_active(obj); + if (ret) + goto out; + + if (obj->active) { + seqno = obj->last_rendering_seqno; + ring = obj->ring; + } + + if (seqno == 0) + goto out; + + /* Do this after OLR check to make sure we make forward progress polling + * on this IOCTL with a 0 timeout (like busy ioctl) + */ + if (!args->timeout_ns) { + ret = -ETIME; + goto out; + } + + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + + ret = __wait_seqno(ring, seqno, true, timeout); + if (timeout) { + WARN_ON(!timespec_valid(timeout)); + args->timeout_ns = timespec_to_ns(timeout); + } + return ret; + +out: + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return ret; +} + +/** * i915_gem_object_sync - sync an object to a ring. * * @obj: object which may be in use on another ring. @@ -2160,7 +2354,7 @@ static int i915_ring_idle(struct intel_ring_buffer *ring) return ret; } - return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); + return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring)); } int i915_gpu_idle(struct drm_device *dev) @@ -2178,6 +2372,10 @@ int i915_gpu_idle(struct drm_device *dev) /* Is the device fubar? */ if (WARN_ON(!list_empty(&ring->gpu_write_list))) return -EBUSY; + + ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); + if (ret) + return ret; } return 0; @@ -2364,7 +2562,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) } if (obj->last_fenced_seqno) { - ret = i915_wait_request(obj->ring, obj->last_fenced_seqno); + ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); if (ret) return ret; @@ -2551,8 +2749,8 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (map_and_fenceable) free_space = drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, - size, alignment, 0, - dev_priv->mm.gtt_mappable_end, + size, alignment, + 0, dev_priv->mm.gtt_mappable_end, 0); else free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, @@ -2563,7 +2761,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->gtt_space = drm_mm_get_block_range_generic(free_space, size, alignment, 0, - dev_priv->mm.gtt_mappable_end, + 0, dev_priv->mm.gtt_mappable_end, 0); else obj->gtt_space = @@ -3030,7 +3228,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (seqno == 0) return 0; - ret = __wait_seqno(ring, seqno, true); + ret = __wait_seqno(ring, seqno, true, NULL); if (ret == 0) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); @@ -3199,30 +3397,9 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * become non-busy without any further actions, therefore emit any * necessary flushes here. */ - args->busy = obj->active; - if (args->busy) { - /* Unconditionally flush objects, even when the gpu still uses this - * object. Userspace calling this function indicates that it wants to - * use this buffer rather sooner than later, so issuing the required - * flush earlier is beneficial. - */ - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - } else { - ret = i915_gem_check_olr(obj->ring, - obj->last_rendering_seqno); - } + ret = i915_gem_object_flush_active(obj); - /* Update the active list for the hardware's current position. - * Otherwise this only updates on a delayed timer or when irqs - * are actually unmasked, and our working set ends up being - * larger than required. - */ - i915_gem_retire_requests_ring(obj->ring); - - args->busy = obj->active; - } + args->busy = obj->active; drm_gem_object_unreference(&obj->base); unlock: @@ -3435,6 +3612,38 @@ i915_gem_idle(struct drm_device *dev) return 0; } +void i915_gem_l3_remap(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + u32 misccpctl; + int i; + + if (!IS_IVYBRIDGE(dev)) + return; + + if (!dev_priv->mm.l3_remap_info) + return; + + misccpctl = I915_READ(GEN7_MISCCPCTL); + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); + POSTING_READ(GEN7_MISCCPCTL); + + for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { + u32 remap = I915_READ(GEN7_L3LOG_BASE + i); + if (remap && remap != dev_priv->mm.l3_remap_info[i/4]) + DRM_DEBUG("0x%x was already programmed to %x\n", + GEN7_L3LOG_BASE + i, remap); + if (remap && !dev_priv->mm.l3_remap_info[i/4]) + DRM_DEBUG_DRIVER("Clearing remapped register\n"); + I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->mm.l3_remap_info[i/4]); + } + + /* Make sure all the writes land before disabling dop clock gating */ + POSTING_READ(GEN7_L3LOG_BASE); + + I915_WRITE(GEN7_MISCCPCTL, misccpctl); +} + void i915_gem_init_swizzling(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -3518,12 +3727,33 @@ void i915_gem_init_ppgtt(struct drm_device *dev) } } +static bool +intel_enable_blt(struct drm_device *dev) +{ + if (!HAS_BLT(dev)) + return false; + + /* The blitter was dysfunctional on early prototypes */ + if (IS_GEN6(dev) && dev->pdev->revision < 8) { + DRM_INFO("BLT not supported on this pre-production hardware;" + " graphics performance will be degraded.\n"); + return false; + } + + return true; +} + int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; + if (!intel_enable_gtt()) + return -EIO; + + i915_gem_l3_remap(dev); + i915_gem_init_swizzling(dev); ret = intel_init_render_ring_buffer(dev); @@ -3536,7 +3766,7 @@ i915_gem_init_hw(struct drm_device *dev) goto cleanup_render_ring; } - if (HAS_BLT(dev)) { + if (intel_enable_blt(dev)) { ret = intel_init_blt_ring_buffer(dev); if (ret) goto cleanup_bsd_ring; @@ -3544,6 +3774,11 @@ i915_gem_init_hw(struct drm_device *dev) dev_priv->next_seqno = 1; + /* + * XXX: There was some w/a described somewhere suggesting loading + * contexts before PPGTT. + */ + i915_gem_context_init(dev); i915_gem_init_ppgtt(dev); return 0; |