summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/ttm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/ttm')
-rw-r--r--drivers/gpu/drm/ttm/Makefile6
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c46
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c30
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c92
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c3
5 files changed, 119 insertions, 58 deletions
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index b2b33dde2afb..b433b9f040c9 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -5,10 +5,6 @@ ccflags-y := -Iinclude/drm
ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o \
- ttm_bo_manager.o
-
-ifeq ($(CONFIG_SWIOTLB),y)
-ttm-y += ttm_page_alloc_dma.o
-endif
+ ttm_bo_manager.o ttm_page_alloc_dma.o
obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f1a857ec1021..8d5a646ebe6a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -429,8 +429,20 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
sync_obj = driver->sync_obj_ref(bo->sync_obj);
spin_unlock(&bdev->fence_lock);
- if (!ret)
+ if (!ret) {
+
+ /*
+ * Make NO_EVICT bos immediately available to
+ * shrinkers, now that they are queued for
+ * destruction.
+ */
+ if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
+ bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
+ ttm_bo_add_to_lru(bo);
+ }
+
ww_mutex_unlock(&bo->resv->lock);
+ }
kref_get(&bo->list_kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
@@ -986,24 +998,32 @@ out_unlock:
return ret;
}
-static int ttm_bo_mem_compat(struct ttm_placement *placement,
- struct ttm_mem_reg *mem)
+static bool ttm_bo_mem_compat(struct ttm_placement *placement,
+ struct ttm_mem_reg *mem,
+ uint32_t *new_flags)
{
int i;
if (mem->mm_node && placement->lpfn != 0 &&
(mem->start < placement->fpfn ||
mem->start + mem->num_pages > placement->lpfn))
- return -1;
+ return false;
for (i = 0; i < placement->num_placement; i++) {
- if ((placement->placement[i] & mem->placement &
- TTM_PL_MASK_CACHING) &&
- (placement->placement[i] & mem->placement &
- TTM_PL_MASK_MEM))
- return i;
+ *new_flags = placement->placement[i];
+ if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
+ (*new_flags & mem->placement & TTM_PL_MASK_MEM))
+ return true;
}
- return -1;
+
+ for (i = 0; i < placement->num_busy_placement; i++) {
+ *new_flags = placement->busy_placement[i];
+ if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
+ (*new_flags & mem->placement & TTM_PL_MASK_MEM))
+ return true;
+ }
+
+ return false;
}
int ttm_bo_validate(struct ttm_buffer_object *bo,
@@ -1012,6 +1032,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
bool no_wait_gpu)
{
int ret;
+ uint32_t new_flags;
lockdep_assert_held(&bo->resv->lock.base);
/* Check that range is valid */
@@ -1022,8 +1043,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
/*
* Check whether we need to move buffer.
*/
- ret = ttm_bo_mem_compat(placement, &bo->mem);
- if (ret < 0) {
+ if (!ttm_bo_mem_compat(placement, &bo->mem, &new_flags)) {
ret = ttm_bo_move_buffer(bo, placement, interruptible,
no_wait_gpu);
if (ret)
@@ -1033,7 +1053,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
* Use the access and other non-mapping-related flag bits from
* the compatible memory placement flags to the active flags
*/
- ttm_flag_masked(&bo->mem.placement, placement->placement[ret],
+ ttm_flag_masked(&bo->mem.placement, new_flags,
~TTM_PL_MASK_MEMTYPE);
}
/*
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 7cc904d3a4d1..4834c463c38b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -343,19 +343,25 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
if (ret)
goto out;
+ /*
+ * Single TTM move. NOP.
+ */
if (old_iomap == NULL && new_iomap == NULL)
goto out2;
+
+ /*
+ * Move nonexistent data. NOP.
+ */
if (old_iomap == NULL && ttm == NULL)
goto out2;
- if (ttm->state == tt_unpopulated) {
+ /*
+ * TTM might be null for moves within the same region.
+ */
+ if (ttm && ttm->state == tt_unpopulated) {
ret = ttm->bdev->driver->ttm_tt_populate(ttm);
- if (ret) {
- /* if we fail here don't nuke the mm node
- * as the bo still owns it */
- old_copy.mm_node = NULL;
+ if (ret)
goto out1;
- }
}
add = 0;
@@ -381,11 +387,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
prot);
} else
ret = ttm_copy_io_page(new_iomap, old_iomap, page);
- if (ret) {
- /* failing here, means keep old copy as-is */
- old_copy.mm_node = NULL;
+ if (ret)
goto out1;
- }
}
mb();
out2:
@@ -403,7 +406,12 @@ out1:
ttm_mem_reg_iounmap(bdev, old_mem, new_iomap);
out:
ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap);
- ttm_bo_mem_put(bo, &old_copy);
+
+ /*
+ * On error, keep the mm node!
+ */
+ if (!ret)
+ ttm_bo_mem_put(bo, &old_copy);
return ret;
}
EXPORT_SYMBOL(ttm_bo_move_memcpy);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 1006c15445e9..ac617f3ecd0c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -41,6 +41,51 @@
#define TTM_BO_VM_NUM_PREFAULT 16
+static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct ttm_bo_device *bdev = bo->bdev;
+ int ret = 0;
+
+ spin_lock(&bdev->fence_lock);
+ if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
+ goto out_unlock;
+
+ /*
+ * Quick non-stalling check for idle.
+ */
+ ret = ttm_bo_wait(bo, false, false, true);
+ if (likely(ret == 0))
+ goto out_unlock;
+
+ /*
+ * If possible, avoid waiting for GPU with mmap_sem
+ * held.
+ */
+ if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
+ ret = VM_FAULT_RETRY;
+ if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out_unlock;
+
+ up_read(&vma->vm_mm->mmap_sem);
+ (void) ttm_bo_wait(bo, false, true, false);
+ goto out_unlock;
+ }
+
+ /*
+ * Ordinary wait.
+ */
+ ret = ttm_bo_wait(bo, false, true, false);
+ if (unlikely(ret != 0))
+ ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
+ VM_FAULT_NOPAGE;
+
+out_unlock:
+ spin_unlock(&bdev->fence_lock);
+ return ret;
+}
+
static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
@@ -57,6 +102,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int retval = VM_FAULT_NOPAGE;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
+ struct vm_area_struct cvma;
/*
* Work around locking order reversal in fault / nopfn
@@ -91,18 +137,11 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* Wait for buffer data in transit, due to a pipelined
* move.
*/
-
- spin_lock(&bdev->fence_lock);
- if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
- ret = ttm_bo_wait(bo, false, true, false);
- spin_unlock(&bdev->fence_lock);
- if (unlikely(ret != 0)) {
- retval = (ret != -ERESTARTSYS) ?
- VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
- goto out_unlock;
- }
- } else
- spin_unlock(&bdev->fence_lock);
+ ret = ttm_bo_vm_fault_idle(bo, vma, vmf);
+ if (unlikely(ret != 0)) {
+ retval = ret;
+ goto out_unlock;
+ }
ret = ttm_mem_io_lock(man, true);
if (unlikely(ret != 0)) {
@@ -126,26 +165,21 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
/*
- * Strictly, we're not allowed to modify vma->vm_page_prot here,
- * since the mmap_sem is only held in read mode. However, we
- * modify only the caching bits of vma->vm_page_prot and
- * consider those bits protected by
- * the bo->mutex, as we should be the only writers.
- * There shouldn't really be any readers of these bits except
- * within vm_insert_mixed()? fork?
- *
- * TODO: Add a list of vmas to the bo, and change the
- * vma->vm_page_prot when the object changes caching policy, with
- * the correct locks held.
+ * Make a local vma copy to modify the page_prot member
+ * and vm_flags if necessary. The vma parameter is protected
+ * by mmap_sem in write mode.
*/
+ cvma = *vma;
+ cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
+
if (bo->mem.bus.is_iomem) {
- vma->vm_page_prot = ttm_io_prot(bo->mem.placement,
- vma->vm_page_prot);
+ cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
+ cvma.vm_page_prot);
} else {
ttm = bo->ttm;
- vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
- vm_get_page_prot(vma->vm_flags) :
- ttm_io_prot(bo->mem.placement, vma->vm_page_prot);
+ if (!(bo->mem.placement & TTM_PL_FLAG_CACHED))
+ cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
+ cvma.vm_page_prot);
/* Allocate all page at once, most common usage */
if (ttm->bdev->driver->ttm_tt_populate(ttm)) {
@@ -172,7 +206,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
pfn = page_to_pfn(page);
}
- ret = vm_insert_mixed(vma, address, pfn);
+ ret = vm_insert_mixed(&cvma, address, pfn);
/*
* Somebody beat us to this PTE or prefaulting to
* an already populated PTE, or prefaulting error.
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 7957beeeaf73..fb8259f69839 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -33,6 +33,7 @@
* when freed).
*/
+#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU)
#define pr_fmt(fmt) "[TTM] " fmt
#include <linux/dma-mapping.h>
@@ -1142,3 +1143,5 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data)
return 0;
}
EXPORT_SYMBOL_GPL(ttm_dma_page_alloc_debugfs);
+
+#endif