summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c824
1 files changed, 325 insertions, 499 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c65c639f772..b851bd34ca18 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,12 +39,6 @@
static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
- bool write);
-static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
- uint64_t offset,
- uint64_t size);
-static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
unsigned alignment,
bool map_and_fenceable);
@@ -125,25 +119,6 @@ i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
return obj->gtt_space && !obj->active && obj->pin_count == 0;
}
-void i915_gem_do_init(struct drm_device *dev,
- unsigned long start,
- unsigned long mappable_end,
- unsigned long end)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
-
- drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
-
- dev_priv->mm.gtt_start = start;
- dev_priv->mm.gtt_mappable_end = mappable_end;
- dev_priv->mm.gtt_end = end;
- dev_priv->mm.gtt_total = end - start;
- dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
-
- /* Take over this portion of the GTT */
- intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
-}
-
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
@@ -154,8 +129,13 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data,
(args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
return -EINVAL;
+ /* GEM with user mode setting was never supported on ilk and later. */
+ if (INTEL_INFO(dev)->gen >= 5)
+ return -ENODEV;
+
mutex_lock(&dev->struct_mutex);
- i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
+ i915_gem_init_global_gtt(dev, args->gtt_start,
+ args->gtt_end, args->gtt_end);
mutex_unlock(&dev->struct_mutex);
return 0;
@@ -259,66 +239,6 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
obj->tiling_mode != I915_TILING_NONE;
}
-/**
- * This is the fast shmem pread path, which attempts to copy_from_user directly
- * from the backing pages of the object to the user's address space. On a
- * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
- */
-static int
-i915_gem_shmem_pread_fast(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pread *args,
- struct drm_file *file)
-{
- struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
- ssize_t remain;
- loff_t offset;
- char __user *user_data;
- int page_offset, page_length;
-
- user_data = (char __user *) (uintptr_t) args->data_ptr;
- remain = args->size;
-
- offset = args->offset;
-
- while (remain > 0) {
- struct page *page;
- char *vaddr;
- int ret;
-
- /* Operation in this page
- *
- * page_offset = offset within page
- * page_length = bytes to copy for this page
- */
- page_offset = offset_in_page(offset);
- page_length = remain;
- if ((page_offset + remain) > PAGE_SIZE)
- page_length = PAGE_SIZE - page_offset;
-
- page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
- if (IS_ERR(page))
- return PTR_ERR(page);
-
- vaddr = kmap_atomic(page);
- ret = __copy_to_user_inatomic(user_data,
- vaddr + page_offset,
- page_length);
- kunmap_atomic(vaddr);
-
- mark_page_accessed(page);
- page_cache_release(page);
- if (ret)
- return -EFAULT;
-
- remain -= page_length;
- user_data += page_length;
- offset += page_length;
- }
-
- return 0;
-}
-
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
const char *gpu_vaddr, int gpu_offset,
@@ -371,37 +291,121 @@ __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
return 0;
}
-/**
- * This is the fallback shmem pread path, which allocates temporary storage
- * in kernel space to copy_to_user into outside of the struct_mutex, so we
- * can copy out of the object's backing pages while holding the struct mutex
- * and not take page faults.
- */
+/* Per-page copy function for the shmem pread fastpath.
+ * Flushes invalid cachelines before reading the target if
+ * needs_clflush is set. */
static int
-i915_gem_shmem_pread_slow(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pread *args,
- struct drm_file *file)
+shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
+ char __user *user_data,
+ bool page_do_bit17_swizzling, bool needs_clflush)
+{
+ char *vaddr;
+ int ret;
+
+ if (unlikely(page_do_bit17_swizzling))
+ return -EINVAL;
+
+ vaddr = kmap_atomic(page);
+ if (needs_clflush)
+ drm_clflush_virt_range(vaddr + shmem_page_offset,
+ page_length);
+ ret = __copy_to_user_inatomic(user_data,
+ vaddr + shmem_page_offset,
+ page_length);
+ kunmap_atomic(vaddr);
+
+ return ret;
+}
+
+static void
+shmem_clflush_swizzled_range(char *addr, unsigned long length,
+ bool swizzled)
+{
+ if (unlikely(swizzled)) {
+ unsigned long start = (unsigned long) addr;
+ unsigned long end = (unsigned long) addr + length;
+
+ /* For swizzling simply ensure that we always flush both
+ * channels. Lame, but simple and it works. Swizzled
+ * pwrite/pread is far from a hotpath - current userspace
+ * doesn't use it at all. */
+ start = round_down(start, 128);
+ end = round_up(end, 128);
+
+ drm_clflush_virt_range((void *)start, end - start);
+ } else {
+ drm_clflush_virt_range(addr, length);
+ }
+
+}
+
+/* Only difference to the fast-path function is that this can handle bit17
+ * and uses non-atomic copy and kmap functions. */
+static int
+shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+ char __user *user_data,
+ bool page_do_bit17_swizzling, bool needs_clflush)
+{
+ char *vaddr;
+ int ret;
+
+ vaddr = kmap(page);
+ if (needs_clflush)
+ shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+ page_length,
+ page_do_bit17_swizzling);
+
+ if (page_do_bit17_swizzling)
+ ret = __copy_to_user_swizzled(user_data,
+ vaddr, shmem_page_offset,
+ page_length);
+ else
+ ret = __copy_to_user(user_data,
+ vaddr + shmem_page_offset,
+ page_length);
+ kunmap(page);
+
+ return ret;
+}
+
+static int
+i915_gem_shmem_pread(struct drm_device *dev,
+ struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_pread *args,
+ struct drm_file *file)
{
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
char __user *user_data;
ssize_t remain;
loff_t offset;
- int shmem_page_offset, page_length, ret;
+ int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
+ int hit_slowpath = 0;
+ int prefaulted = 0;
+ int needs_clflush = 0;
+ int release_page;
user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
- offset = args->offset;
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
+ /* If we're not in the cpu read domain, set ourself into the gtt
+ * read domain and manually flush cachelines (if required). This
+ * optimizes for the case when the gpu will dirty the data
+ * anyway again before the next pread happens. */
+ if (obj->cache_level == I915_CACHE_NONE)
+ needs_clflush = 1;
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (ret)
+ return ret;
+ }
- mutex_unlock(&dev->struct_mutex);
+ offset = args->offset;
while (remain > 0) {
struct page *page;
- char *vaddr;
/* Operation in this page
*
@@ -413,28 +417,51 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
- page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto out;
+ if (obj->pages) {
+ page = obj->pages[offset >> PAGE_SHIFT];
+ release_page = 0;
+ } else {
+ page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto out;
+ }
+ release_page = 1;
}
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
- vaddr = kmap(page);
- if (page_do_bit17_swizzling)
- ret = __copy_to_user_swizzled(user_data,
- vaddr, shmem_page_offset,
- page_length);
- else
- ret = __copy_to_user(user_data,
- vaddr + shmem_page_offset,
- page_length);
- kunmap(page);
+ ret = shmem_pread_fast(page, shmem_page_offset, page_length,
+ user_data, page_do_bit17_swizzling,
+ needs_clflush);
+ if (ret == 0)
+ goto next_page;
- mark_page_accessed(page);
+ hit_slowpath = 1;
+ page_cache_get(page);
+ mutex_unlock(&dev->struct_mutex);
+
+ if (!prefaulted) {
+ ret = fault_in_multipages_writeable(user_data, remain);
+ /* Userspace is tricking us, but we've already clobbered
+ * its pages with the prefault and promised to write the
+ * data up to the first fault. Hence ignore any errors
+ * and just continue. */
+ (void)ret;
+ prefaulted = 1;
+ }
+
+ ret = shmem_pread_slow(page, shmem_page_offset, page_length,
+ user_data, page_do_bit17_swizzling,
+ needs_clflush);
+
+ mutex_lock(&dev->struct_mutex);
page_cache_release(page);
+next_page:
+ mark_page_accessed(page);
+ if (release_page)
+ page_cache_release(page);
if (ret) {
ret = -EFAULT;
@@ -447,10 +474,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
}
out:
- mutex_lock(&dev->struct_mutex);
- /* Fixup: Kill any reinstated backing storage pages */
- if (obj->madv == __I915_MADV_PURGED)
- i915_gem_object_truncate(obj);
+ if (hit_slowpath) {
+ /* Fixup: Kill any reinstated backing storage pages */
+ if (obj->madv == __I915_MADV_PURGED)
+ i915_gem_object_truncate(obj);
+ }
return ret;
}
@@ -476,11 +504,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
args->size))
return -EFAULT;
- ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
- args->size);
- if (ret)
- return -EFAULT;
-
ret = i915_mutex_lock_interruptible(dev);
if (ret)
return ret;
@@ -500,17 +523,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = i915_gem_object_set_cpu_read_domain_range(obj,
- args->offset,
- args->size);
- if (ret)
- goto out;
-
- ret = -EFAULT;
- if (!i915_gem_object_needs_bit17_swizzle(obj))
- ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
- if (ret == -EFAULT)
- ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
+ ret = i915_gem_shmem_pread(dev, obj, args, file);
out:
drm_gem_object_unreference(&obj->base);
@@ -539,30 +552,6 @@ fast_user_write(struct io_mapping *mapping,
return unwritten;
}
-/* Here's the write path which can sleep for
- * page faults
- */
-
-static inline void
-slow_kernel_write(struct io_mapping *mapping,
- loff_t gtt_base, int gtt_offset,
- struct page *user_page, int user_offset,
- int length)
-{
- char __iomem *dst_vaddr;
- char *src_vaddr;
-
- dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
- src_vaddr = kmap(user_page);
-
- memcpy_toio(dst_vaddr + gtt_offset,
- src_vaddr + user_offset,
- length);
-
- kunmap(user_page);
- io_mapping_unmap(dst_vaddr);
-}
-
/**
* This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached.
@@ -577,7 +566,19 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
ssize_t remain;
loff_t offset, page_base;
char __user *user_data;
- int page_offset, page_length;
+ int page_offset, page_length, ret;
+
+ ret = i915_gem_object_pin(obj, 0, true);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret)
+ goto out_unpin;
+
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto out_unpin;
user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size;
@@ -602,214 +603,133 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
* retry in the slow path.
*/
if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
- page_offset, user_data, page_length))
- return -EFAULT;
+ page_offset, user_data, page_length)) {
+ ret = -EFAULT;
+ goto out_unpin;
+ }
remain -= page_length;
user_data += page_length;
offset += page_length;
}
- return 0;
+out_unpin:
+ i915_gem_object_unpin(obj);
+out:
+ return ret;
}
-/**
- * This is the fallback GTT pwrite path, which uses get_user_pages to pin
- * the memory and maps it using kmap_atomic for copying.
- *
- * This code resulted in x11perf -rgb10text consuming about 10% more CPU
- * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
- */
+/* Per-page copy function for the shmem pwrite fastpath.
+ * Flushes invalid cachelines before writing to the target if
+ * needs_clflush_before is set and flushes out any written cachelines after
+ * writing if needs_clflush is set. */
static int
-i915_gem_gtt_pwrite_slow(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
+shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
+ char __user *user_data,
+ bool page_do_bit17_swizzling,
+ bool needs_clflush_before,
+ bool needs_clflush_after)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
- ssize_t remain;
- loff_t gtt_page_base, offset;
- loff_t first_data_page, last_data_page, num_pages;
- loff_t pinned_pages, i;
- struct page **user_pages;
- struct mm_struct *mm = current->mm;
- int gtt_page_offset, data_page_offset, data_page_index, page_length;
+ char *vaddr;
int ret;
- uint64_t data_ptr = args->data_ptr;
-
- remain = args->size;
-
- /* Pin the user pages containing the data. We can't fault while
- * holding the struct mutex, and all of the pwrite implementations
- * want to hold it while dereferencing the user data.
- */
- first_data_page = data_ptr / PAGE_SIZE;
- last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
- num_pages = last_data_page - first_data_page + 1;
-
- user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
- if (user_pages == NULL)
- return -ENOMEM;
-
- mutex_unlock(&dev->struct_mutex);
- down_read(&mm->mmap_sem);
- pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
- num_pages, 0, 0, user_pages, NULL);
- up_read(&mm->mmap_sem);
- mutex_lock(&dev->struct_mutex);
- if (pinned_pages < num_pages) {
- ret = -EFAULT;
- goto out_unpin_pages;
- }
-
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- goto out_unpin_pages;
-
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- goto out_unpin_pages;
-
- offset = obj->gtt_offset + args->offset;
- while (remain > 0) {
- /* Operation in this page
- *
- * gtt_page_base = page offset within aperture
- * gtt_page_offset = offset within page in aperture
- * data_page_index = page number in get_user_pages return
- * data_page_offset = offset with data_page_index page.
- * page_length = bytes to copy for this page
- */
- gtt_page_base = offset & PAGE_MASK;
- gtt_page_offset = offset_in_page(offset);
- data_page_index = data_ptr / PAGE_SIZE - first_data_page;
- data_page_offset = offset_in_page(data_ptr);
-
- page_length = remain;
- if ((gtt_page_offset + page_length) > PAGE_SIZE)
- page_length = PAGE_SIZE - gtt_page_offset;
- if ((data_page_offset + page_length) > PAGE_SIZE)
- page_length = PAGE_SIZE - data_page_offset;
-
- slow_kernel_write(dev_priv->mm.gtt_mapping,
- gtt_page_base, gtt_page_offset,
- user_pages[data_page_index],
- data_page_offset,
- page_length);
-
- remain -= page_length;
- offset += page_length;
- data_ptr += page_length;
- }
+ if (unlikely(page_do_bit17_swizzling))
+ return -EINVAL;
-out_unpin_pages:
- for (i = 0; i < pinned_pages; i++)
- page_cache_release(user_pages[i]);
- drm_free_large(user_pages);
+ vaddr = kmap_atomic(page);
+ if (needs_clflush_before)
+ drm_clflush_virt_range(vaddr + shmem_page_offset,
+ page_length);
+ ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
+ user_data,
+ page_length);
+ if (needs_clflush_after)
+ drm_clflush_virt_range(vaddr + shmem_page_offset,
+ page_length);
+ kunmap_atomic(vaddr);
return ret;
}
-/**
- * This is the fast shmem pwrite path, which attempts to directly
- * copy_from_user into the kmapped pages backing the object.
- */
+/* Only difference to the fast-path function is that this can handle bit17
+ * and uses non-atomic copy and kmap functions. */
static int
-i915_gem_shmem_pwrite_fast(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
+shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+ char __user *user_data,
+ bool page_do_bit17_swizzling,
+ bool needs_clflush_before,
+ bool needs_clflush_after)
{
- struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
- ssize_t remain;
- loff_t offset;
- char __user *user_data;
- int page_offset, page_length;
-
- user_data = (char __user *) (uintptr_t) args->data_ptr;
- remain = args->size;
-
- offset = args->offset;
- obj->dirty = 1;
-
- while (remain > 0) {
- struct page *page;
- char *vaddr;
- int ret;
-
- /* Operation in this page
- *
- * page_offset = offset within page
- * page_length = bytes to copy for this page
- */
- page_offset = offset_in_page(offset);
- page_length = remain;
- if ((page_offset + remain) > PAGE_SIZE)
- page_length = PAGE_SIZE - page_offset;
-
- page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ char *vaddr;
+ int ret;
- vaddr = kmap_atomic(page);
- ret = __copy_from_user_inatomic(vaddr + page_offset,
+ vaddr = kmap(page);
+ if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
+ shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+ page_length,
+ page_do_bit17_swizzling);
+ if (page_do_bit17_swizzling)
+ ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
user_data,
page_length);
- kunmap_atomic(vaddr);
-
- set_page_dirty(page);
- mark_page_accessed(page);
- page_cache_release(page);
-
- /* If we get a fault while copying data, then (presumably) our
- * source page isn't available. Return the error and we'll
- * retry in the slow path.
- */
- if (ret)
- return -EFAULT;
-
- remain -= page_length;
- user_data += page_length;
- offset += page_length;
- }
+ else
+ ret = __copy_from_user(vaddr + shmem_page_offset,
+ user_data,
+ page_length);
+ if (needs_clflush_after)
+ shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+ page_length,
+ page_do_bit17_swizzling);
+ kunmap(page);
- return 0;
+ return ret;
}
-/**
- * This is the fallback shmem pwrite path, which uses get_user_pages to pin
- * the memory and maps it using kmap_atomic for copying.
- *
- * This avoids taking mmap_sem for faulting on the user's address while the
- * struct_mutex is held.
- */
static int
-i915_gem_shmem_pwrite_slow(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
+i915_gem_shmem_pwrite(struct drm_device *dev,
+ struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_pwrite *args,
+ struct drm_file *file)
{
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
ssize_t remain;
loff_t offset;
char __user *user_data;
- int shmem_page_offset, page_length, ret;
+ int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
+ int hit_slowpath = 0;
+ int needs_clflush_after = 0;
+ int needs_clflush_before = 0;
+ int release_page;
user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ /* If we're not in the cpu write domain, set ourself into the gtt
+ * write domain and manually flush cachelines (if required). This
+ * optimizes for the case when the gpu will use the data
+ * right away and we therefore have to clflush anyway. */
+ if (obj->cache_level == I915_CACHE_NONE)
+ needs_clflush_after = 1;
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret)
+ return ret;
+ }
+ /* Same trick applies for invalidate partially written cachelines before
+ * writing. */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
+ && obj->cache_level == I915_CACHE_NONE)
+ needs_clflush_before = 1;
+
offset = args->offset;
obj->dirty = 1;
- mutex_unlock(&dev->struct_mutex);
-
while (remain > 0) {
struct page *page;
- char *vaddr;
+ int partial_cacheline_write;
/* Operation in this page
*
@@ -822,29 +742,51 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
- page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto out;
+ /* If we don't overwrite a cacheline completely we need to be
+ * careful to have up-to-date data by first clflushing. Don't
+ * overcomplicate things and flush the entire patch. */
+ partial_cacheline_write = needs_clflush_before &&
+ ((shmem_page_offset | page_length)
+ & (boot_cpu_data.x86_clflush_size - 1));
+
+ if (obj->pages) {
+ page = obj->pages[offset >> PAGE_SHIFT];
+ release_page = 0;
+ } else {
+ page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto out;
+ }
+ release_page = 1;
}
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
- vaddr = kmap(page);
- if (page_do_bit17_swizzling)
- ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
- user_data,
- page_length);
- else
- ret = __copy_from_user(vaddr + shmem_page_offset,
- user_data,
- page_length);
- kunmap(page);
+ ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
+ user_data, page_do_bit17_swizzling,
+ partial_cacheline_write,
+ needs_clflush_after);
+ if (ret == 0)
+ goto next_page;
+ hit_slowpath = 1;
+ page_cache_get(page);
+ mutex_unlock(&dev->struct_mutex);
+
+ ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
+ user_data, page_do_bit17_swizzling,
+ partial_cacheline_write,
+ needs_clflush_after);
+
+ mutex_lock(&dev->struct_mutex);
+ page_cache_release(page);
+next_page:
set_page_dirty(page);
mark_page_accessed(page);
- page_cache_release(page);
+ if (release_page)
+ page_cache_release(page);
if (ret) {
ret = -EFAULT;
@@ -857,17 +799,21 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
}
out:
- mutex_lock(&dev->struct_mutex);
- /* Fixup: Kill any reinstated backing storage pages */
- if (obj->madv == __I915_MADV_PURGED)
- i915_gem_object_truncate(obj);
- /* and flush dirty cachelines in case the object isn't in the cpu write
- * domain anymore. */
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- i915_gem_clflush_object(obj);
- intel_gtt_chipset_flush();
+ if (hit_slowpath) {
+ /* Fixup: Kill any reinstated backing storage pages */
+ if (obj->madv == __I915_MADV_PURGED)
+ i915_gem_object_truncate(obj);
+ /* and flush dirty cachelines in case the object isn't in the cpu write
+ * domain anymore. */
+ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ i915_gem_clflush_object(obj);
+ intel_gtt_chipset_flush();
+ }
}
+ if (needs_clflush_after)
+ intel_gtt_chipset_flush();
+
return ret;
}
@@ -892,8 +838,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
args->size))
return -EFAULT;
- ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
- args->size);
+ ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
+ args->size);
if (ret)
return -EFAULT;
@@ -916,6 +862,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
+ ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
* different detiling behavior between reading and writing.
@@ -928,42 +875,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
}
if (obj->gtt_space &&
+ obj->cache_level == I915_CACHE_NONE &&
+ obj->map_and_fenceable &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- ret = i915_gem_object_pin(obj, 0, true);
- if (ret)
- goto out;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- goto out_unpin;
-
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- goto out_unpin;
-
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
- if (ret == -EFAULT)
- ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
-
-out_unpin:
- i915_gem_object_unpin(obj);
-
- if (ret != -EFAULT)
- goto out;
- /* Fall through to the shmfs paths because the gtt paths might
- * fail with non-page-backed user pointers (e.g. gtt mappings
- * when moving data between textures). */
+ /* Note that the gtt paths might fail with non-page-backed user
+ * pointers (e.g. gtt mappings when moving data between
+ * textures). Fallback to the shmem path in that case. */
}
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
- if (ret)
- goto out;
-
- ret = -EFAULT;
- if (!i915_gem_object_needs_bit17_swizzle(obj))
- ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
if (ret == -EFAULT)
- ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
+ ret = i915_gem_shmem_pwrite(dev, obj, args, file);
out:
drm_gem_object_unreference(&obj->base);
@@ -1153,6 +1075,9 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
goto unlock;
}
+ if (!obj->has_global_gtt_mapping)
+ i915_gem_gtt_bind_object(obj, obj->cache_level);
+
if (obj->tiling_mode == I915_TILING_NONE)
ret = i915_gem_object_put_fence(obj);
else
@@ -1546,6 +1471,9 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
inode = obj->base.filp->f_path.dentry->d_inode;
shmem_truncate_range(inode, 0, (loff_t)-1);
+ if (obj->base.map_list.map)
+ drm_gem_free_mmap_offset(&obj->base);
+
obj->madv = __I915_MADV_PURGED;
}
@@ -1954,6 +1882,8 @@ i915_wait_request(struct intel_ring_buffer *ring,
if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
if (HAS_PCH_SPLIT(ring->dev))
ier = I915_READ(DEIER) | I915_READ(GTIER);
+ else if (IS_VALLEYVIEW(ring->dev))
+ ier = I915_READ(GTIER) | I915_READ(VLV_IER);
else
ier = I915_READ(IER);
if (!ier) {
@@ -2100,11 +2030,13 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
trace_i915_gem_object_unbind(obj);
- i915_gem_gtt_unbind_object(obj);
+ if (obj->has_global_gtt_mapping)
+ i915_gem_gtt_unbind_object(obj);
if (obj->has_aliasing_ppgtt_mapping) {
i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
obj->has_aliasing_ppgtt_mapping = 0;
}
+ i915_gem_gtt_finish_object(obj);
i915_gem_object_put_pages_gtt(obj);
@@ -2749,7 +2681,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
return ret;
}
- ret = i915_gem_gtt_bind_object(obj);
+ ret = i915_gem_gtt_prepare_object(obj);
if (ret) {
i915_gem_object_put_pages_gtt(obj);
drm_mm_put_block(obj->gtt_space);
@@ -2761,6 +2693,9 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
goto search_free;
}
+ if (!dev_priv->mm.aliasing_ppgtt)
+ i915_gem_gtt_bind_object(obj, obj->cache_level);
+
list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
@@ -2953,7 +2888,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
return ret;
}
- i915_gem_gtt_rebind_object(obj, cache_level);
+ if (obj->has_global_gtt_mapping)
+ i915_gem_gtt_bind_object(obj, cache_level);
if (obj->has_aliasing_ppgtt_mapping)
i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
obj, cache_level);
@@ -3082,7 +3018,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
-static int
+int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
{
uint32_t old_write_domain, old_read_domains;
@@ -3101,11 +3037,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
i915_gem_object_flush_gtt_write_domain(obj);
- /* If we have a partially-valid cache of the object in the CPU,
- * finish invalidating it and free the per-page flags.
- */
- i915_gem_object_set_to_full_cpu_read_domain(obj);
-
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@@ -3136,113 +3067,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
-/**
- * Moves the object from a partially CPU read to a full one.
- *
- * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
- * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
- */
-static void
-i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
-{
- if (!obj->page_cpu_valid)
- return;
-
- /* If we're partially in the CPU read domain, finish moving it in.
- */
- if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
- int i;
-
- for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
- if (obj->page_cpu_valid[i])
- continue;
- drm_clflush_pages(obj->pages + i, 1);
- }
- }
-
- /* Free the page_cpu_valid mappings which are now stale, whether
- * or not we've got I915_GEM_DOMAIN_CPU.
- */
- kfree(obj->page_cpu_valid);
- obj->page_cpu_valid = NULL;
-}
-
-/**
- * Set the CPU read domain on a range of the object.
- *
- * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
- * not entirely valid. The page_cpu_valid member of the object flags which
- * pages have been flushed, and will be respected by
- * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
- * of the whole object.
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-static int
-i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
- uint64_t offset, uint64_t size)
-{
- uint32_t old_read_domains;
- int i, ret;
-
- if (offset == 0 && size == obj->base.size)
- return i915_gem_object_set_to_cpu_domain(obj, 0);
-
- ret = i915_gem_object_flush_gpu_write_domain(obj);
- if (ret)
- return ret;
-
- ret = i915_gem_object_wait_rendering(obj);
- if (ret)
- return ret;
-
- i915_gem_object_flush_gtt_write_domain(obj);
-
- /* If we're already fully in the CPU read domain, we're done. */
- if (obj->page_cpu_valid == NULL &&
- (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
- return 0;
-
- /* Otherwise, create/clear the per-page CPU read domain flag if we're
- * newly adding I915_GEM_DOMAIN_CPU
- */
- if (obj->page_cpu_valid == NULL) {
- obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
- GFP_KERNEL);
- if (obj->page_cpu_valid == NULL)
- return -ENOMEM;
- } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
- memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
-
- /* Flush the cache on any pages that are still invalid from the CPU's
- * perspective.
- */
- for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
- i++) {
- if (obj->page_cpu_valid[i])
- continue;
-
- drm_clflush_pages(obj->pages + i, 1);
-
- obj->page_cpu_valid[i] = 1;
- }
-
- /* It should now be out of any other write domains, and we can update
- * the domain values for our changes.
- */
- BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
-
- old_read_domains = obj->base.read_domains;
- obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
-
- trace_i915_gem_object_change_domain(obj,
- old_read_domains,
- obj->base.write_domain);
-
- return 0;
-}
-
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
@@ -3343,6 +3167,9 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
return ret;
}
+ if (!obj->has_global_gtt_mapping && map_and_fenceable)
+ i915_gem_gtt_bind_object(obj, obj->cache_level);
+
if (obj->pin_count++ == 0) {
if (!obj->active)
list_move_tail(&obj->mm_list,
@@ -3664,7 +3491,6 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
drm_gem_object_release(&obj->base);
i915_gem_info_remove_obj(dev_priv, obj->base.size);
- kfree(obj->page_cpu_valid);
kfree(obj->bit_17);
kfree(obj);
}