summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/v3d
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/v3d')
-rw-r--r--drivers/gpu/drm/v3d/Kconfig9
-rw-r--r--drivers/gpu/drm/v3d/Makefile18
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c389
-rw-r--r--drivers/gpu/drm/v3d/v3d_debugfs.c191
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c371
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h294
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c58
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c668
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c206
-rw-r--r--drivers/gpu/drm/v3d/v3d_mmu.c122
-rw-r--r--drivers/gpu/drm/v3d/v3d_regs.h295
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c228
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace.h82
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace_points.c9
14 files changed, 2940 insertions, 0 deletions
diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig
new file mode 100644
index 000000000000..a0c0259355bd
--- /dev/null
+++ b/drivers/gpu/drm/v3d/Kconfig
@@ -0,0 +1,9 @@
+config DRM_V3D
+ tristate "Broadcom V3D 3.x and newer"
+ depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST
+ depends on DRM
+ depends on COMMON_CLK
+ select DRM_SCHED
+ help
+ Choose this option if you have a system that has a Broadcom
+ V3D 3.x or newer GPU, such as BCM7268.
diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
new file mode 100644
index 000000000000..34446e1de64f
--- /dev/null
+++ b/drivers/gpu/drm/v3d/Makefile
@@ -0,0 +1,18 @@
+# Please keep these build lists sorted!
+
+# core driver code
+v3d-y := \
+ v3d_bo.o \
+ v3d_drv.o \
+ v3d_fence.o \
+ v3d_gem.o \
+ v3d_irq.o \
+ v3d_mmu.o \
+ v3d_trace_points.o \
+ v3d_sched.o
+
+v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
+
+obj-$(CONFIG_DRM_V3D) += v3d.o
+
+CFLAGS_v3d_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
new file mode 100644
index 000000000000..7b1e2a549a71
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+/**
+ * DOC: V3D GEM BO management support
+ *
+ * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the
+ * GPU and the bus, allowing us to use shmem objects for our storage
+ * instead of CMA.
+ *
+ * Physically contiguous objects may still be imported to V3D, but the
+ * driver doesn't allocate physically contiguous objects on its own.
+ * Display engines requiring physically contiguous allocations should
+ * look into Mesa's "renderonly" support (as used by the Mesa pl111
+ * driver) for an example of how to integrate with V3D.
+ *
+ * Long term, we should support evicting pages from the MMU when under
+ * memory pressure (thus the v3d_bo_get_pages() refcounting), but
+ * that's not a high priority since our systems tend to not have swap.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/pfn_t.h>
+
+#include "v3d_drv.h"
+#include "uapi/drm/v3d_drm.h"
+
+/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps
+ * it for DMA.
+ */
+static int
+v3d_bo_get_pages(struct v3d_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->base;
+ struct drm_device *dev = obj->dev;
+ int npages = obj->size >> PAGE_SHIFT;
+ int ret = 0;
+
+ mutex_lock(&bo->lock);
+ if (bo->pages_refcount++ != 0)
+ goto unlock;
+
+ if (!obj->import_attach) {
+ bo->pages = drm_gem_get_pages(obj);
+ if (IS_ERR(bo->pages)) {
+ ret = PTR_ERR(bo->pages);
+ goto unlock;
+ }
+
+ bo->sgt = drm_prime_pages_to_sg(bo->pages, npages);
+ if (IS_ERR(bo->sgt)) {
+ ret = PTR_ERR(bo->sgt);
+ goto put_pages;
+ }
+
+ /* Map the pages for use by the GPU. */
+ dma_map_sg(dev->dev, bo->sgt->sgl,
+ bo->sgt->nents, DMA_BIDIRECTIONAL);
+ } else {
+ bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL);
+ if (!bo->pages)
+ goto put_pages;
+
+ drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages,
+ NULL, npages);
+
+ /* Note that dma-bufs come in mapped. */
+ }
+
+ mutex_unlock(&bo->lock);
+
+ return 0;
+
+put_pages:
+ drm_gem_put_pages(obj, bo->pages, true, true);
+ bo->pages = NULL;
+unlock:
+ bo->pages_refcount--;
+ mutex_unlock(&bo->lock);
+ return ret;
+}
+
+static void
+v3d_bo_put_pages(struct v3d_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->base;
+
+ mutex_lock(&bo->lock);
+ if (--bo->pages_refcount == 0) {
+ if (!obj->import_attach) {
+ dma_unmap_sg(obj->dev->dev, bo->sgt->sgl,
+ bo->sgt->nents, DMA_BIDIRECTIONAL);
+ sg_free_table(bo->sgt);
+ kfree(bo->sgt);
+ drm_gem_put_pages(obj, bo->pages, true, true);
+ } else {
+ kfree(bo->pages);
+ }
+ }
+ mutex_unlock(&bo->lock);
+}
+
+static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev,
+ size_t unaligned_size)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct drm_gem_object *obj;
+ struct v3d_bo *bo;
+ size_t size = roundup(unaligned_size, PAGE_SIZE);
+ int ret;
+
+ if (size == 0)
+ return ERR_PTR(-EINVAL);
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo)
+ return ERR_PTR(-ENOMEM);
+ obj = &bo->base;
+
+ INIT_LIST_HEAD(&bo->vmas);
+ INIT_LIST_HEAD(&bo->unref_head);
+ mutex_init(&bo->lock);
+
+ ret = drm_gem_object_init(dev, obj, size);
+ if (ret)
+ goto free_bo;
+
+ spin_lock(&v3d->mm_lock);
+ ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node,
+ obj->size >> PAGE_SHIFT,
+ GMP_GRANULARITY >> PAGE_SHIFT, 0, 0);
+ spin_unlock(&v3d->mm_lock);
+ if (ret)
+ goto free_obj;
+
+ return bo;
+
+free_obj:
+ drm_gem_object_release(obj);
+free_bo:
+ kfree(bo);
+ return ERR_PTR(ret);
+}
+
+struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+ size_t unaligned_size)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct drm_gem_object *obj;
+ struct v3d_bo *bo;
+ int ret;
+
+ bo = v3d_bo_create_struct(dev, unaligned_size);
+ if (IS_ERR(bo))
+ return bo;
+ obj = &bo->base;
+
+ bo->resv = &bo->_resv;
+ reservation_object_init(bo->resv);
+
+ ret = v3d_bo_get_pages(bo);
+ if (ret)
+ goto free_mm;
+
+ v3d_mmu_insert_ptes(bo);
+
+ mutex_lock(&v3d->bo_lock);
+ v3d->bo_stats.num_allocated++;
+ v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT;
+ mutex_unlock(&v3d->bo_lock);
+
+ return bo;
+
+free_mm:
+ spin_lock(&v3d->mm_lock);
+ drm_mm_remove_node(&bo->node);
+ spin_unlock(&v3d->mm_lock);
+
+ drm_gem_object_release(obj);
+ kfree(bo);
+ return ERR_PTR(ret);
+}
+
+/* Called DRM core on the last userspace/kernel unreference of the
+ * BO.
+ */
+void v3d_free_object(struct drm_gem_object *obj)
+{
+ struct v3d_dev *v3d = to_v3d_dev(obj->dev);
+ struct v3d_bo *bo = to_v3d_bo(obj);
+
+ mutex_lock(&v3d->bo_lock);
+ v3d->bo_stats.num_allocated--;
+ v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT;
+ mutex_unlock(&v3d->bo_lock);
+
+ reservation_object_fini(&bo->_resv);
+
+ v3d_bo_put_pages(bo);
+
+ if (obj->import_attach)
+ drm_prime_gem_destroy(obj, bo->sgt);
+
+ v3d_mmu_remove_ptes(bo);
+ spin_lock(&v3d->mm_lock);
+ drm_mm_remove_node(&bo->node);
+ spin_unlock(&v3d->mm_lock);
+
+ mutex_destroy(&bo->lock);
+
+ drm_gem_object_release(obj);
+ kfree(bo);
+}
+
+struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj)
+{
+ struct v3d_bo *bo = to_v3d_bo(obj);
+
+ return bo->resv;
+}
+
+static void
+v3d_set_mmap_vma_flags(struct vm_area_struct *vma)
+{
+ vma->vm_flags &= ~VM_PFNMAP;
+ vma->vm_flags |= VM_MIXEDMAP;
+ vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+}
+
+int v3d_gem_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct v3d_bo *bo = to_v3d_bo(obj);
+ unsigned long pfn;
+ pgoff_t pgoff;
+ int ret;
+
+ /* We don't use vmf->pgoff since that has the fake offset: */
+ pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+ pfn = page_to_pfn(bo->pages[pgoff]);
+
+ ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
+
+ switch (ret) {
+ case -EAGAIN:
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ /*
+ * EBUSY is ok: this just means that another thread
+ * already did the job.
+ */
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int ret;
+
+ ret = drm_gem_mmap(filp, vma);
+ if (ret)
+ return ret;
+
+ v3d_set_mmap_vma_flags(vma);
+
+ return ret;
+}
+
+int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ int ret;
+
+ ret = drm_gem_mmap_obj(obj, obj->size, vma);
+ if (ret < 0)
+ return ret;
+
+ v3d_set_mmap_vma_flags(vma);
+
+ return 0;
+}
+
+struct sg_table *
+v3d_prime_get_sg_table(struct drm_gem_object *obj)
+{
+ struct v3d_bo *bo = to_v3d_bo(obj);
+ int npages = obj->size >> PAGE_SHIFT;
+
+ return drm_prime_pages_to_sg(bo->pages, npages);
+}
+
+struct drm_gem_object *
+v3d_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt)
+{
+ struct drm_gem_object *obj;
+ struct v3d_bo *bo;
+
+ bo = v3d_bo_create_struct(dev, attach->dmabuf->size);
+ if (IS_ERR(bo))
+ return ERR_CAST(bo);
+ obj = &bo->base;
+
+ bo->resv = attach->dmabuf->resv;
+
+ bo->sgt = sgt;
+ v3d_bo_get_pages(bo);
+
+ v3d_mmu_insert_ptes(bo);
+
+ return obj;
+}
+
+int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_v3d_create_bo *args = data;
+ struct v3d_bo *bo = NULL;
+ int ret;
+
+ if (args->flags != 0) {
+ DRM_INFO("unknown create_bo flags: %d\n", args->flags);
+ return -EINVAL;
+ }
+
+ bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size));
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ args->offset = bo->node.start << PAGE_SHIFT;
+
+ ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle);
+ drm_gem_object_put_unlocked(&bo->base);
+
+ return ret;
+}
+
+int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_v3d_mmap_bo *args = data;
+ struct drm_gem_object *gem_obj;
+ int ret;
+
+ if (args->flags != 0) {
+ DRM_INFO("unknown mmap_bo flags: %d\n", args->flags);
+ return -EINVAL;
+ }
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+
+ ret = drm_gem_create_mmap_offset(gem_obj);
+ if (ret == 0)
+ args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return ret;
+}
+
+int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_v3d_get_bo_offset *args = data;
+ struct drm_gem_object *gem_obj;
+ struct v3d_bo *bo;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+ bo = to_v3d_bo(gem_obj);
+
+ args->offset = bo->node.start << PAGE_SHIFT;
+
+ drm_gem_object_put_unlocked(gem_obj);
+ return 0;
+}
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
new file mode 100644
index 000000000000..4db62c545748
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+#include <linux/circ_buf.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <drm/drmP.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define REGDEF(reg) { reg, #reg }
+struct v3d_reg_def {
+ u32 reg;
+ const char *name;
+};
+
+static const struct v3d_reg_def v3d_hub_reg_defs[] = {
+ REGDEF(V3D_HUB_AXICFG),
+ REGDEF(V3D_HUB_UIFCFG),
+ REGDEF(V3D_HUB_IDENT0),
+ REGDEF(V3D_HUB_IDENT1),
+ REGDEF(V3D_HUB_IDENT2),
+ REGDEF(V3D_HUB_IDENT3),
+ REGDEF(V3D_HUB_INT_STS),
+ REGDEF(V3D_HUB_INT_MSK_STS),
+};
+
+static const struct v3d_reg_def v3d_gca_reg_defs[] = {
+ REGDEF(V3D_GCA_SAFE_SHUTDOWN),
+ REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+};
+
+static const struct v3d_reg_def v3d_core_reg_defs[] = {
+ REGDEF(V3D_CTL_IDENT0),
+ REGDEF(V3D_CTL_IDENT1),
+ REGDEF(V3D_CTL_IDENT2),
+ REGDEF(V3D_CTL_MISCCFG),
+ REGDEF(V3D_CTL_INT_STS),
+ REGDEF(V3D_CTL_INT_MSK_STS),
+ REGDEF(V3D_CLE_CT0CS),
+ REGDEF(V3D_CLE_CT0CA),
+ REGDEF(V3D_CLE_CT0EA),
+ REGDEF(V3D_CLE_CT1CS),
+ REGDEF(V3D_CLE_CT1CA),
+ REGDEF(V3D_CLE_CT1EA),
+
+ REGDEF(V3D_PTB_BPCA),
+ REGDEF(V3D_PTB_BPCS),
+
+ REGDEF(V3D_MMU_CTL),
+ REGDEF(V3D_MMU_VIO_ADDR),
+
+ REGDEF(V3D_GMP_STATUS),
+ REGDEF(V3D_GMP_CFG),
+ REGDEF(V3D_GMP_VIO_ADDR),
+};
+
+static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ int i, core;
+
+ for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg,
+ V3D_READ(v3d_hub_reg_defs[i].reg));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg,
+ V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+ }
+
+ for (core = 0; core < v3d->cores; core++) {
+ for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) {
+ seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
+ core,
+ v3d_core_reg_defs[i].name,
+ v3d_core_reg_defs[i].reg,
+ V3D_CORE_READ(core,
+ v3d_core_reg_defs[i].reg));
+ }
+ }
+
+ return 0;
+}
+
+static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ u32 ident0, ident1, ident2, ident3, cores;
+ int ret, core;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (ret < 0)
+ return ret;
+
+ ident0 = V3D_READ(V3D_HUB_IDENT0);
+ ident1 = V3D_READ(V3D_HUB_IDENT1);
+ ident2 = V3D_READ(V3D_HUB_IDENT2);
+ ident3 = V3D_READ(V3D_HUB_IDENT3);
+ cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+
+ seq_printf(m, "Revision: %d.%d.%d.%d\n",
+ V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER),
+ V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV),
+ V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV),
+ V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX));
+ seq_printf(m, "MMU: %s\n",
+ (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no");
+ seq_printf(m, "TFU: %s\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no");
+ seq_printf(m, "TSY: %s\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no");
+ seq_printf(m, "MSO: %s\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no");
+ seq_printf(m, "L3C: %s (%dkb)\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no",
+ V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB));
+
+ for (core = 0; core < cores; core++) {
+ u32 misccfg;
+ u32 nslc, ntmu, qups;
+
+ ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0);
+ ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1);
+ ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2);
+ misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG);
+
+ nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+ ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU);
+ qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+ seq_printf(m, "Core %d:\n", core);
+ seq_printf(m, " Revision: %d.%d\n",
+ V3D_GET_FIELD(ident0, V3D_IDENT0_VER),
+ V3D_GET_FIELD(ident1, V3D_IDENT1_REV));
+ seq_printf(m, " Slices: %d\n", nslc);
+ seq_printf(m, " TMUs: %d\n", nslc * ntmu);
+ seq_printf(m, " QPUs: %d\n", nslc * qups);
+ seq_printf(m, " Semaphores: %d\n",
+ V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+ seq_printf(m, " BCG int: %d\n",
+ (ident2 & V3D_IDENT2_BCG_INT) != 0);
+ seq_printf(m, " Override TMU: %d\n",
+ (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
+ }
+
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+
+ return 0;
+}
+
+static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+
+ mutex_lock(&v3d->bo_lock);
+ seq_printf(m, "allocated bos: %d\n",
+ v3d->bo_stats.num_allocated);
+ seq_printf(m, "allocated bo size (kb): %ld\n",
+ (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10));
+ mutex_unlock(&v3d->bo_lock);
+
+ return 0;
+}
+
+static const struct drm_info_list v3d_debugfs_list[] = {
+ {"v3d_ident", v3d_v3d_debugfs_ident, 0},
+ {"v3d_regs", v3d_v3d_debugfs_regs, 0},
+ {"bo_stats", v3d_debugfs_bo_stats, 0},
+};
+
+int
+v3d_debugfs_init(struct drm_minor *minor)
+{
+ return drm_debugfs_create_files(v3d_debugfs_list,
+ ARRAY_SIZE(v3d_debugfs_list),
+ minor->debugfs_root, minor);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
new file mode 100644
index 000000000000..cdb582043b4f
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D Graphics Driver
+ *
+ * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
+ * For V3D 2.x support, see the VC4 driver.
+ *
+ * Currently only single-core rendering using the binner and renderer
+ * is supported. The TFU (texture formatting unit) and V3D 4.x's CSD
+ * (compute shader dispatch) are not yet supported.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+
+#include "uapi/drm/v3d_drm.h"
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define DRIVER_NAME "v3d"
+#define DRIVER_DESC "Broadcom V3D graphics"
+#define DRIVER_DATE "20180419"
+#define DRIVER_MAJOR 1
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+#ifdef CONFIG_PM
+static int v3d_runtime_suspend(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct v3d_dev *v3d = to_v3d_dev(drm);
+
+ v3d_irq_disable(v3d);
+
+ clk_disable_unprepare(v3d->clk);
+
+ return 0;
+}
+
+static int v3d_runtime_resume(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct v3d_dev *v3d = to_v3d_dev(drm);
+ int ret;
+
+ ret = clk_prepare_enable(v3d->clk);
+ if (ret != 0)
+ return ret;
+
+ /* XXX: VPM base */
+
+ v3d_mmu_set_page_table(v3d);
+ v3d_irq_enable(v3d);
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops v3d_v3d_pm_ops = {
+ SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL)
+};
+
+static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct drm_v3d_get_param *args = data;
+ int ret;
+ static const u32 reg_map[] = {
+ [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2,
+ };
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need
+ * to explicitly allow it in the "the register in our
+ * parameter map" check.
+ */
+ if (args->param < ARRAY_SIZE(reg_map) &&
+ (reg_map[args->param] ||
+ args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) {
+ u32 offset = reg_map[args->param];
+
+ if (args->value != 0)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
+ args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) {
+ args->value = V3D_CORE_READ(0, offset);
+ } else {
+ args->value = V3D_READ(offset);
+ }
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+ return 0;
+ }
+
+ /* Any params that aren't just register reads would go here. */
+
+ DRM_DEBUG("Unknown parameter %d\n", args->param);
+ return -EINVAL;
+}
+
+static int
+v3d_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv;
+ int i;
+
+ v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
+ if (!v3d_priv)
+ return -ENOMEM;
+
+ v3d_priv->v3d = v3d;
+
+ for (i = 0; i < V3D_MAX_QUEUES; i++) {
+ drm_sched_entity_init(&v3d->queue[i].sched,
+ &v3d_priv->sched_entity[i],
+ &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
+ NULL);
+ }
+
+ file->driver_priv = v3d_priv;
+
+ return 0;
+}
+
+static void
+v3d_postclose(struct drm_device *dev, struct drm_file *file)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file->driver_priv;
+ enum v3d_queue q;
+
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ drm_sched_entity_fini(&v3d->queue[q].sched,
+ &v3d_priv->sched_entity[q]);
+ }
+
+ kfree(v3d_priv);
+}
+
+static const struct file_operations v3d_drm_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = v3d_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = drm_compat_ioctl,
+ .llseek = noop_llseek,
+};
+
+/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
+ * protection between clients. Note that render nodes would be be
+ * able to submit CLs that could access BOs from clients authenticated
+ * with the master node.
+ */
+static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+ DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
+};
+
+static const struct vm_operations_struct v3d_vm_ops = {
+ .fault = v3d_gem_fault,
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+static struct drm_driver v3d_drm_driver = {
+ .driver_features = (DRIVER_GEM |
+ DRIVER_RENDER |
+ DRIVER_PRIME |
+ DRIVER_SYNCOBJ),
+
+ .open = v3d_open,
+ .postclose = v3d_postclose,
+
+#if defined(CONFIG_DEBUG_FS)
+ .debugfs_init = v3d_debugfs_init,
+#endif
+
+ .gem_free_object_unlocked = v3d_free_object,
+ .gem_vm_ops = &v3d_vm_ops,
+
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_import = drm_gem_prime_import,
+ .gem_prime_export = drm_gem_prime_export,
+ .gem_prime_res_obj = v3d_prime_res_obj,
+ .gem_prime_get_sg_table = v3d_prime_get_sg_table,
+ .gem_prime_import_sg_table = v3d_prime_import_sg_table,
+ .gem_prime_mmap = v3d_prime_mmap,
+
+ .ioctls = v3d_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
+ .fops = &v3d_drm_fops,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static const struct of_device_id v3d_of_match[] = {
+ { .compatible = "brcm,7268-v3d" },
+ { .compatible = "brcm,7278-v3d" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, v3d_of_match);
+
+static int
+map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
+{
+ struct resource *res =
+ platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name);
+
+ *regs = devm_ioremap_resource(v3d->dev, res);
+ return PTR_ERR_OR_ZERO(*regs);
+}
+
+static int v3d_platform_drm_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct drm_device *drm;
+ struct v3d_dev *v3d;
+ int ret;
+ u32 ident1;
+
+ dev->coherent_dma_mask = DMA_BIT_MASK(36);
+
+ v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
+ if (!v3d)
+ return -ENOMEM;
+ v3d->dev = dev;
+ v3d->pdev = pdev;
+ drm = &v3d->drm;
+
+ ret = map_regs(v3d, &v3d->bridge_regs, "bridge");
+ if (ret)
+ goto dev_free;
+
+ ret = map_regs(v3d, &v3d->hub_regs, "hub");
+ if (ret)
+ goto dev_free;
+
+ ret = map_regs(v3d, &v3d->core_regs[0], "core0");
+ if (ret)
+ goto dev_free;
+
+ ident1 = V3D_READ(V3D_HUB_IDENT1);
+ v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 +
+ V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV));
+ v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+ WARN_ON(v3d->cores > 1); /* multicore not yet implemented */
+
+ if (v3d->ver < 41) {
+ ret = map_regs(v3d, &v3d->gca_regs, "gca");
+ if (ret)
+ goto dev_free;
+ }
+
+ v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ if (!v3d->mmu_scratch) {
+ dev_err(dev, "Failed to allocate MMU scratch page\n");
+ ret = -ENOMEM;
+ goto dev_free;
+ }
+
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, 50);
+ pm_runtime_enable(dev);
+
+ ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev);
+ if (ret)
+ goto dma_free;
+
+ platform_set_drvdata(pdev, drm);
+ drm->dev_private = v3d;
+
+ ret = v3d_gem_init(drm);
+ if (ret)
+ goto dev_destroy;
+
+ v3d_irq_init(v3d);
+
+ ret = drm_dev_register(drm, 0);
+ if (ret)
+ goto gem_destroy;
+
+ return 0;
+
+gem_destroy:
+ v3d_gem_destroy(drm);
+dev_destroy:
+ drm_dev_put(drm);
+dma_free:
+ dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+dev_free:
+ kfree(v3d);
+ return ret;
+}
+
+static int v3d_platform_drm_remove(struct platform_device *pdev)
+{
+ struct drm_device *drm = platform_get_drvdata(pdev);
+ struct v3d_dev *v3d = to_v3d_dev(drm);
+
+ drm_dev_unregister(drm);
+
+ v3d_gem_destroy(drm);
+
+ drm_dev_put(drm);
+
+ dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+
+ return 0;
+}
+
+static struct platform_driver v3d_platform_driver = {
+ .probe = v3d_platform_drm_probe,
+ .remove = v3d_platform_drm_remove,
+ .driver = {
+ .name = "v3d",
+ .of_match_table = v3d_of_match,
+ },
+};
+
+static int __init v3d_drm_register(void)
+{
+ return platform_driver_register(&v3d_platform_driver);
+}
+
+static void __exit v3d_drm_unregister(void)
+{
+ platform_driver_unregister(&v3d_platform_driver);
+}
+
+module_init(v3d_drm_register);
+module_exit(v3d_drm_unregister);
+
+MODULE_ALIAS("platform:v3d-drm");
+MODULE_DESCRIPTION("Broadcom V3D DRM Driver");
+MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
new file mode 100644
index 000000000000..a043ac3aae98
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+#include <linux/reservation.h>
+#include <drm/drmP.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_gem.h>
+#include <drm/gpu_scheduler.h>
+
+#define GMP_GRANULARITY (128 * 1024)
+
+/* Enum for each of the V3D queues. We maintain various queue
+ * tracking as an array because at some point we'll want to support
+ * the TFU (texture formatting unit) as another queue.
+ */
+enum v3d_queue {
+ V3D_BIN,
+ V3D_RENDER,
+};
+
+#define V3D_MAX_QUEUES (V3D_RENDER + 1)
+
+struct v3d_queue_state {
+ struct drm_gpu_scheduler sched;
+
+ u64 fence_context;
+ u64 emit_seqno;
+ u64 finished_seqno;
+};
+
+struct v3d_dev {
+ struct drm_device drm;
+
+ /* Short representation (e.g. 33, 41) of the V3D tech version
+ * and revision.
+ */
+ int ver;
+
+ struct device *dev;
+ struct platform_device *pdev;
+ void __iomem *hub_regs;
+ void __iomem *core_regs[3];
+ void __iomem *bridge_regs;
+ void __iomem *gca_regs;
+ struct clk *clk;
+
+ /* Virtual and DMA addresses of the single shared page table. */
+ volatile u32 *pt;
+ dma_addr_t pt_paddr;
+
+ /* Virtual and DMA addresses of the MMU's scratch page. When
+ * a read or write is invalid in the MMU, it will be
+ * redirected here.
+ */
+ void *mmu_scratch;
+ dma_addr_t mmu_scratch_paddr;
+
+ /* Number of V3D cores. */
+ u32 cores;
+
+ /* Allocator managing the address space. All units are in
+ * number of pages.
+ */
+ struct drm_mm mm;
+ spinlock_t mm_lock;
+
+ struct work_struct overflow_mem_work;
+
+ struct v3d_exec_info *bin_job;
+ struct v3d_exec_info *render_job;
+
+ struct v3d_queue_state queue[V3D_MAX_QUEUES];
+
+ /* Spinlock used to synchronize the overflow memory
+ * management against bin job submission.
+ */
+ spinlock_t job_lock;
+
+ /* Protects bo_stats */
+ struct mutex bo_lock;
+
+ /* Lock taken when resetting the GPU, to keep multiple
+ * processes from trying to park the scheduler threads and
+ * reset at once.
+ */
+ struct mutex reset_lock;
+
+ struct {
+ u32 num_allocated;
+ u32 pages_allocated;
+ } bo_stats;
+};
+
+static inline struct v3d_dev *
+to_v3d_dev(struct drm_device *dev)
+{
+ return (struct v3d_dev *)dev->dev_private;
+}
+
+/* The per-fd struct, which tracks the MMU mappings. */
+struct v3d_file_priv {
+ struct v3d_dev *v3d;
+
+ struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
+};
+
+/* Tracks a mapping of a BO into a per-fd address space */
+struct v3d_vma {
+ struct v3d_page_table *pt;
+ struct list_head list; /* entry in v3d_bo.vmas */
+};
+
+struct v3d_bo {
+ struct drm_gem_object base;
+
+ struct mutex lock;
+
+ struct drm_mm_node node;
+
+ u32 pages_refcount;
+ struct page **pages;
+ struct sg_table *sgt;
+ void *vaddr;
+
+ struct list_head vmas; /* list of v3d_vma */
+
+ /* List entry for the BO's position in
+ * v3d_exec_info->unref_list
+ */
+ struct list_head unref_head;
+
+ /* normally (resv == &_resv) except for imported bo's */
+ struct reservation_object *resv;
+ struct reservation_object _resv;
+};
+
+static inline struct v3d_bo *
+to_v3d_bo(struct drm_gem_object *bo)
+{
+ return (struct v3d_bo *)bo;
+}
+
+struct v3d_fence {
+ struct dma_fence base;
+ struct drm_device *dev;
+ /* v3d seqno for signaled() test */
+ u64 seqno;
+ enum v3d_queue queue;
+};
+
+static inline struct v3d_fence *
+to_v3d_fence(struct dma_fence *fence)
+{
+ return (struct v3d_fence *)fence;
+}
+
+#define V3D_READ(offset) readl(v3d->hub_regs + offset)
+#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset)
+
+#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset)
+#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset)
+
+#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset)
+#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset)
+
+#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset)
+#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset)
+
+struct v3d_job {
+ struct drm_sched_job base;
+
+ struct v3d_exec_info *exec;
+
+ /* An optional fence userspace can pass in for the job to depend on. */
+ struct dma_fence *in_fence;
+
+ /* v3d fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *done_fence;
+
+ /* GPU virtual addresses of the start/end of the CL job. */
+ u32 start, end;
+};
+
+struct v3d_exec_info {
+ struct v3d_dev *v3d;
+
+ struct v3d_job bin, render;
+
+ /* Fence for when the scheduler considers the binner to be
+ * done, for render to depend on.
+ */
+ struct dma_fence *bin_done_fence;
+
+ struct kref refcount;
+
+ /* This is the array of BOs that were looked up at the start of exec. */
+ struct v3d_bo **bo;
+ u32 bo_count;
+
+ /* List of overflow BOs used in the job that need to be
+ * released once the job is complete.
+ */
+ struct list_head unref_list;
+
+ /* Submitted tile memory allocation start/size, tile state. */
+ u32 qma, qms, qts;
+};
+
+/**
+ * _wait_for - magic (register) wait macro
+ *
+ * Does the right thing for modeset paths when run under kdgb or similar atomic
+ * contexts. Note that it's important that we check the condition again after
+ * having timed out, since the timeout could be due to preemption or similar and
+ * we've never had a chance to check the condition before the timeout.
+ */
+#define wait_for(COND, MS) ({ \
+ unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \
+ int ret__ = 0; \
+ while (!(COND)) { \
+ if (time_after(jiffies, timeout__)) { \
+ if (!(COND)) \
+ ret__ = -ETIMEDOUT; \
+ break; \
+ } \
+ msleep(1); \
+ } \
+ ret__; \
+})
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+ /* nsecs_to_jiffies64() does not guard against overflow */
+ if (NSEC_PER_SEC % HZ &&
+ div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+ return MAX_JIFFY_OFFSET;
+
+ return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+/* v3d_bo.c */
+void v3d_free_object(struct drm_gem_object *gem_obj);
+struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+ size_t size);
+int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_gem_fault(struct vm_fault *vmf);
+int v3d_mmap(struct file *filp, struct vm_area_struct *vma);
+struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj);
+int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
+
+/* v3d_debugfs.c */
+int v3d_debugfs_init(struct drm_minor *minor);
+
+/* v3d_fence.c */
+extern const struct dma_fence_ops v3d_fence_ops;
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue);
+
+/* v3d_gem.c */
+int v3d_gem_init(struct drm_device *dev);
+void v3d_gem_destroy(struct drm_device *dev);
+int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+void v3d_exec_put(struct v3d_exec_info *exec);
+void v3d_reset(struct v3d_dev *v3d);
+void v3d_invalidate_caches(struct v3d_dev *v3d);
+void v3d_flush_caches(struct v3d_dev *v3d);
+
+/* v3d_irq.c */
+void v3d_irq_init(struct v3d_dev *v3d);
+void v3d_irq_enable(struct v3d_dev *v3d);
+void v3d_irq_disable(struct v3d_dev *v3d);
+void v3d_irq_reset(struct v3d_dev *v3d);
+
+/* v3d_mmu.c */
+int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo,
+ u32 *offset);
+int v3d_mmu_set_page_table(struct v3d_dev *v3d);
+void v3d_mmu_insert_ptes(struct v3d_bo *bo);
+void v3d_mmu_remove_ptes(struct v3d_bo *bo);
+
+/* v3d_sched.c */
+int v3d_sched_init(struct v3d_dev *v3d);
+void v3d_sched_fini(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
new file mode 100644
index 000000000000..087d49c8cb12
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+#include "v3d_drv.h"
+
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue)
+{
+ struct v3d_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return ERR_PTR(-ENOMEM);
+
+ fence->dev = &v3d->drm;
+ fence->queue = queue;
+ fence->seqno = ++v3d->queue[queue].emit_seqno;
+ dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock,
+ v3d->queue[queue].fence_context, fence->seqno);
+
+ return &fence->base;
+}
+
+static const char *v3d_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "v3d";
+}
+
+static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct v3d_fence *f = to_v3d_fence(fence);
+
+ if (f->queue == V3D_BIN)
+ return "v3d-bin";
+ else
+ return "v3d-render";
+}
+
+static bool v3d_fence_enable_signaling(struct dma_fence *fence)
+{
+ return true;
+}
+
+static bool v3d_fence_signaled(struct dma_fence *fence)
+{
+ struct v3d_fence *f = to_v3d_fence(fence);
+ struct v3d_dev *v3d = to_v3d_dev(f->dev);
+
+ return v3d->queue[f->queue].finished_seqno >= f->seqno;
+}
+
+const struct dma_fence_ops v3d_fence_ops = {
+ .get_driver_name = v3d_fence_get_driver_name,
+ .get_timeline_name = v3d_fence_get_timeline_name,
+ .enable_signaling = v3d_fence_enable_signaling,
+ .signaled = v3d_fence_signaled,
+ .wait = dma_fence_default_wait,
+ .release = dma_fence_free,
+};
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
new file mode 100644
index 000000000000..b513f9189caf
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -0,0 +1,668 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+#include <drm/drmP.h>
+#include <drm/drm_syncobj.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/sched/signal.h>
+
+#include "uapi/drm/v3d_drm.h"
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+static void
+v3d_init_core(struct v3d_dev *v3d, int core)
+{
+ /* Set OVRTMUOUT, which means that the texture sampler uniform
+ * configuration's tmu output type field is used, instead of
+ * using the hardware default behavior based on the texture
+ * type. If you want the default behavior, you can still put
+ * "2" in the indirect texture state's output_type field.
+ */
+ V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
+
+ /* Whenever we flush the L2T cache, we always want to flush
+ * the whole thing.
+ */
+ V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
+ V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
+}
+
+/* Sets invariant state for the HW. */
+static void
+v3d_init_hw_state(struct v3d_dev *v3d)
+{
+ v3d_init_core(v3d, 0);
+}
+
+static void
+v3d_idle_axi(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
+
+ if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
+ (V3D_GMP_STATUS_RD_COUNT_MASK |
+ V3D_GMP_STATUS_WR_COUNT_MASK |
+ V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
+ DRM_ERROR("Failed to wait for safe GMP shutdown\n");
+ }
+}
+
+static void
+v3d_idle_gca(struct v3d_dev *v3d)
+{
+ if (v3d->ver >= 41)
+ return;
+
+ V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
+
+ if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
+ V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
+ V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
+ DRM_ERROR("Failed to wait for safe GCA shutdown\n");
+ }
+}
+
+static void
+v3d_reset_v3d(struct v3d_dev *v3d)
+{
+ int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
+
+ if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
+ V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
+
+ /* GFXH-1383: The SW_INIT may cause a stray write to address 0
+ * of the unit, so reset it to its power-on value here.
+ */
+ V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
+ } else {
+ WARN_ON_ONCE(V3D_GET_FIELD(version,
+ V3D_TOP_GR_BRIDGE_MAJOR) != 7);
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
+ V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
+ }
+
+ v3d_init_hw_state(v3d);
+}
+
+void
+v3d_reset(struct v3d_dev *v3d)
+{
+ struct drm_device *dev = &v3d->drm;
+
+ DRM_ERROR("Resetting GPU.\n");
+ trace_v3d_reset_begin(dev);
+
+ /* XXX: only needed for safe powerdown, not reset. */
+ if (false)
+ v3d_idle_axi(v3d, 0);
+
+ v3d_idle_gca(v3d);
+ v3d_reset_v3d(v3d);
+
+ v3d_mmu_set_page_table(v3d);
+ v3d_irq_reset(v3d);
+
+ trace_v3d_reset_end(dev);
+}
+
+static void
+v3d_flush_l3(struct v3d_dev *v3d)
+{
+ if (v3d->ver < 41) {
+ u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
+
+ V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
+ gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
+
+ if (v3d->ver < 33) {
+ V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
+ gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
+ }
+ }
+}
+
+/* Invalidates the (read-only) L2 cache. */
+static void
+v3d_invalidate_l2(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
+ V3D_L2CACTL_L2CCLR |
+ V3D_L2CACTL_L2CENA);
+}
+
+static void
+v3d_invalidate_l1td(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+ V3D_L2TCACTL_L2TFLS), 100)) {
+ DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
+ }
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+v3d_flush_l2t(struct v3d_dev *v3d, int core)
+{
+ v3d_invalidate_l1td(v3d, core);
+
+ V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
+ V3D_L2TCACTL_L2TFLS |
+ V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+ V3D_L2TCACTL_L2TFLS), 100)) {
+ DRM_ERROR("Timeout waiting for L2T flush\n");
+ }
+}
+
+/* Invalidates the slice caches. These are read-only caches. */
+static void
+v3d_invalidate_slices(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+v3d_invalidate_l2t(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core,
+ V3D_CTL_L2TCACTL,
+ V3D_L2TCACTL_L2TFLS |
+ V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM));
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+ V3D_L2TCACTL_L2TFLS), 100)) {
+ DRM_ERROR("Timeout waiting for L2T invalidate\n");
+ }
+}
+
+void
+v3d_invalidate_caches(struct v3d_dev *v3d)
+{
+ v3d_flush_l3(v3d);
+
+ v3d_invalidate_l2(v3d, 0);
+ v3d_invalidate_slices(v3d, 0);
+ v3d_flush_l2t(v3d, 0);
+}
+
+void
+v3d_flush_caches(struct v3d_dev *v3d)
+{
+ v3d_invalidate_l1td(v3d, 0);
+ v3d_invalidate_l2t(v3d, 0);
+}
+
+static void
+v3d_attach_object_fences(struct v3d_exec_info *exec)
+{
+ struct dma_fence *out_fence = &exec->render.base.s_fence->finished;
+ struct v3d_bo *bo;
+ int i;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ bo = to_v3d_bo(&exec->bo[i]->base);
+
+ /* XXX: Use shared fences for read-only objects. */
+ reservation_object_add_excl_fence(bo->resv, out_fence);
+ }
+}
+
+static void
+v3d_unlock_bo_reservations(struct drm_device *dev,
+ struct v3d_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx)
+{
+ int i;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base);
+
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list). They're entirely private
+ * to v3d, so we don't attach dma-buf fences to them.
+ */
+static int
+v3d_lock_bo_reservations(struct drm_device *dev,
+ struct v3d_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx)
+{
+ int contended_lock = -1;
+ int i, ret;
+ struct v3d_bo *bo;
+
+ ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+ if (contended_lock != -1) {
+ bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+ ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+ acquire_ctx);
+ if (ret) {
+ ww_acquire_done(acquire_ctx);
+ return ret;
+ }
+ }
+
+ for (i = 0; i < exec->bo_count; i++) {
+ if (i == contended_lock)
+ continue;
+
+ bo = to_v3d_bo(&exec->bo[i]->base);
+
+ ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; j++) {
+ bo = to_v3d_bo(&exec->bo[j]->base);
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ if (contended_lock != -1 && contended_lock >= i) {
+ bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ if (ret == -EDEADLK) {
+ contended_lock = i;
+ goto retry;
+ }
+
+ ww_acquire_done(acquire_ctx);
+ return ret;
+ }
+ }
+
+ ww_acquire_done(acquire_ctx);
+
+ /* Reserve space for our shared (read-only) fence references,
+ * before we commit the CL to the hardware.
+ */
+ for (i = 0; i < exec->bo_count; i++) {
+ bo = to_v3d_bo(&exec->bo[i]->base);
+
+ ret = reservation_object_reserve_shared(bo->resv);
+ if (ret) {
+ v3d_unlock_bo_reservations(dev, exec, acquire_ctx);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @exec: V3D job being set up
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list. This does the validation of the job's
+ * BO list and reference counting for the lifetime of the job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at v3d_exec_cleanup() time.
+ */
+static int
+v3d_cl_lookup_bos(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_v3d_submit_cl *args,
+ struct v3d_exec_info *exec)
+{
+ u32 *handles;
+ int ret = 0;
+ int i;
+
+ exec->bo_count = args->bo_handle_count;
+
+ if (!exec->bo_count) {
+ /* See comment on bo_index for why we have to check
+ * this.
+ */
+ DRM_DEBUG("Rendering requires BOs\n");
+ return -EINVAL;
+ }
+
+ exec->bo = kvmalloc_array(exec->bo_count,
+ sizeof(struct drm_gem_cma_object *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!exec->bo) {
+ DRM_DEBUG("Failed to allocate validated BO pointers\n");
+ return -ENOMEM;
+ }
+
+ handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
+ if (!handles) {
+ ret = -ENOMEM;
+ DRM_DEBUG("Failed to allocate incoming GEM handles\n");
+ goto fail;
+ }
+
+ if (copy_from_user(handles,
+ (void __user *)(uintptr_t)args->bo_handles,
+ exec->bo_count * sizeof(u32))) {
+ ret = -EFAULT;
+ DRM_DEBUG("Failed to copy in GEM handles\n");
+ goto fail;
+ }
+
+ spin_lock(&file_priv->table_lock);
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+ handles[i]);
+ if (!bo) {
+ DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+ i, handles[i]);
+ ret = -ENOENT;
+ spin_unlock(&file_priv->table_lock);
+ goto fail;
+ }
+ drm_gem_object_get(bo);
+ exec->bo[i] = to_v3d_bo(bo);
+ }
+ spin_unlock(&file_priv->table_lock);
+
+fail:
+ kvfree(handles);
+ return ret;
+}
+
+static void
+v3d_exec_cleanup(struct kref *ref)
+{
+ struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
+ refcount);
+ struct v3d_dev *v3d = exec->v3d;
+ unsigned int i;
+ struct v3d_bo *bo, *save;
+
+ dma_fence_put(exec->bin.in_fence);
+ dma_fence_put(exec->render.in_fence);
+
+ dma_fence_put(exec->bin.done_fence);
+ dma_fence_put(exec->render.done_fence);
+
+ dma_fence_put(exec->bin_done_fence);
+
+ for (i = 0; i < exec->bo_count; i++)
+ drm_gem_object_put_unlocked(&exec->bo[i]->base);
+ kvfree(exec->bo);
+
+ list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
+ drm_gem_object_put_unlocked(&bo->base);
+ }
+
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+
+ kfree(exec);
+}
+
+void v3d_exec_put(struct v3d_exec_info *exec)
+{
+ kref_put(&exec->refcount, v3d_exec_cleanup);
+}
+
+int
+v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ int ret;
+ struct drm_v3d_wait_bo *args = data;
+ struct drm_gem_object *gem_obj;
+ struct v3d_bo *bo;
+ ktime_t start = ktime_get();
+ u64 delta_ns;
+ unsigned long timeout_jiffies =
+ nsecs_to_jiffies_timeout(args->timeout_ns);
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -EINVAL;
+ }
+ bo = to_v3d_bo(gem_obj);
+
+ ret = reservation_object_wait_timeout_rcu(bo->resv,
+ true, true,
+ timeout_jiffies);
+
+ if (ret == 0)
+ ret = -ETIME;
+ else if (ret > 0)
+ ret = 0;
+
+ /* Decrement the user's timeout, in case we got interrupted
+ * such that the ioctl will be restarted.
+ */
+ delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
+ if (delta_ns < args->timeout_ns)
+ args->timeout_ns -= delta_ns;
+ else
+ args->timeout_ns = 0;
+
+ /* Asked to wait beyond the jiffie/scheduler precision? */
+ if (ret == -ETIME && args->timeout_ns)
+ ret = -EAGAIN;
+
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return ret;
+}
+
+/**
+ * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * This is the main entrypoint for userspace to submit a 3D frame to
+ * the GPU. Userspace provides the binner command list (if
+ * applicable), and the kernel sets up the render command list to draw
+ * to the framebuffer described in the ioctl, using the command lists
+ * that the 3D engine's binner will produce.
+ */
+int
+v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_submit_cl *args = data;
+ struct v3d_exec_info *exec;
+ struct ww_acquire_ctx acquire_ctx;
+ struct drm_syncobj *sync_out;
+ int ret = 0;
+
+ if (args->pad != 0) {
+ DRM_INFO("pad must be zero: %d\n", args->pad);
+ return -EINVAL;
+ }
+
+ exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+ if (!exec)
+ return -ENOMEM;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (ret < 0) {
+ kfree(exec);
+ return ret;
+ }
+
+ kref_init(&exec->refcount);
+
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
+ &exec->bin.in_fence);
+ if (ret == -EINVAL)
+ goto fail;
+
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
+ &exec->render.in_fence);
+ if (ret == -EINVAL)
+ goto fail;
+
+ exec->qma = args->qma;
+ exec->qms = args->qms;
+ exec->qts = args->qts;
+ exec->bin.exec = exec;
+ exec->bin.start = args->bcl_start;
+ exec->bin.end = args->bcl_end;
+ exec->render.exec = exec;
+ exec->render.start = args->rcl_start;
+ exec->render.end = args->rcl_end;
+ exec->v3d = v3d;
+ INIT_LIST_HEAD(&exec->unref_list);
+
+ ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
+ if (ret)
+ goto fail;
+
+ ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx);
+ if (ret)
+ goto fail;
+
+ if (exec->bin.start != exec->bin.end) {
+ ret = drm_sched_job_init(&exec->bin.base,
+ &v3d->queue[V3D_BIN].sched,
+ &v3d_priv->sched_entity[V3D_BIN],
+ v3d_priv);
+ if (ret)
+ goto fail_unreserve;
+
+ exec->bin_done_fence =
+ dma_fence_get(&exec->bin.base.s_fence->finished);
+
+ kref_get(&exec->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&exec->bin.base,
+ &v3d_priv->sched_entity[V3D_BIN]);
+ }
+
+ ret = drm_sched_job_init(&exec->render.base,
+ &v3d->queue[V3D_RENDER].sched,
+ &v3d_priv->sched_entity[V3D_RENDER],
+ v3d_priv);
+ if (ret)
+ goto fail_unreserve;
+
+ kref_get(&exec->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&exec->render.base,
+ &v3d_priv->sched_entity[V3D_RENDER]);
+
+ v3d_attach_object_fences(exec);
+
+ v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+
+ /* Update the return sync object for the */
+ sync_out = drm_syncobj_find(file_priv, args->out_sync);
+ if (sync_out) {
+ drm_syncobj_replace_fence(sync_out,
+ &exec->render.base.s_fence->finished);
+ drm_syncobj_put(sync_out);
+ }
+
+ v3d_exec_put(exec);
+
+ return 0;
+
+fail_unreserve:
+ v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+fail:
+ v3d_exec_put(exec);
+
+ return ret;
+}
+
+int
+v3d_gem_init(struct drm_device *dev)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ u32 pt_size = 4096 * 1024;
+ int ret, i;
+
+ for (i = 0; i < V3D_MAX_QUEUES; i++)
+ v3d->queue[i].fence_context = dma_fence_context_alloc(1);
+
+ spin_lock_init(&v3d->mm_lock);
+ spin_lock_init(&v3d->job_lock);
+ mutex_init(&v3d->bo_lock);
+ mutex_init(&v3d->reset_lock);
+
+ /* Note: We don't allocate address 0. Various bits of HW
+ * treat 0 as special, such as the occlusion query counters
+ * where 0 means "disabled".
+ */
+ drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
+
+ v3d->pt = dma_alloc_wc(v3d->dev, pt_size,
+ &v3d->pt_paddr,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ if (!v3d->pt) {
+ drm_mm_takedown(&v3d->mm);
+ dev_err(v3d->dev,
+ "Failed to allocate page tables. "
+ "Please ensure you have CMA enabled.\n");
+ return -ENOMEM;
+ }
+
+ v3d_init_hw_state(v3d);
+ v3d_mmu_set_page_table(v3d);
+
+ ret = v3d_sched_init(v3d);
+ if (ret) {
+ drm_mm_takedown(&v3d->mm);
+ dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt,
+ v3d->pt_paddr);
+ }
+
+ return 0;
+}
+
+void
+v3d_gem_destroy(struct drm_device *dev)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ enum v3d_queue q;
+
+ v3d_sched_fini(v3d);
+
+ /* Waiting for exec to finish would need to be done before
+ * unregistering V3D.
+ */
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ WARN_ON(v3d->queue[q].emit_seqno !=
+ v3d->queue[q].finished_seqno);
+ }
+
+ drm_mm_takedown(&v3d->mm);
+
+ dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
new file mode 100644
index 000000000000..77e1fa046c10
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+/**
+ * DOC: Interrupt management for the V3D engine
+ *
+ * When we take a binning or rendering flush done interrupt, we need
+ * to signal the fence for that job so that the scheduler can queue up
+ * the next one and unblock any waiters.
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
+ V3D_INT_FLDONE | \
+ V3D_INT_FRDONE | \
+ V3D_INT_GMPV))
+
+#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \
+ V3D_HUB_INT_MMU_PTI | \
+ V3D_HUB_INT_MMU_CAP))
+
+static void
+v3d_overflow_mem_work(struct work_struct *work)
+{
+ struct v3d_dev *v3d =
+ container_of(work, struct v3d_dev, overflow_mem_work);
+ struct drm_device *dev = &v3d->drm;
+ struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024);
+ unsigned long irqflags;
+
+ if (IS_ERR(bo)) {
+ DRM_ERROR("Couldn't allocate binner overflow mem\n");
+ return;
+ }
+
+ /* We lost a race, and our work task came in after the bin job
+ * completed and exited. This can happen because the HW
+ * signals OOM before it's fully OOM, so the binner might just
+ * barely complete.
+ *
+ * If we lose the race and our work task comes in after a new
+ * bin job got scheduled, that's fine. We'll just give them
+ * some binner pool anyway.
+ */
+ spin_lock_irqsave(&v3d->job_lock, irqflags);
+ if (!v3d->bin_job) {
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ goto out;
+ }
+
+ drm_gem_object_get(&bo->base);
+ list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+ V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
+ V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size);
+
+out:
+ drm_gem_object_put_unlocked(&bo->base);
+}
+
+static irqreturn_t
+v3d_irq(int irq, void *arg)
+{
+ struct v3d_dev *v3d = arg;
+ u32 intsts;
+ irqreturn_t status = IRQ_NONE;
+
+ intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS);
+
+ /* Acknowledge the interrupts we're handling here. */
+ V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts);
+
+ if (intsts & V3D_INT_OUTOMEM) {
+ /* Note that the OOM status is edge signaled, so the
+ * interrupt won't happen again until the we actually
+ * add more memory.
+ */
+ schedule_work(&v3d->overflow_mem_work);
+ status = IRQ_HANDLED;
+ }
+
+ if (intsts & V3D_INT_FLDONE) {
+ v3d->queue[V3D_BIN].finished_seqno++;
+ dma_fence_signal(v3d->bin_job->bin.done_fence);
+ status = IRQ_HANDLED;
+ }
+
+ if (intsts & V3D_INT_FRDONE) {
+ v3d->queue[V3D_RENDER].finished_seqno++;
+ dma_fence_signal(v3d->render_job->render.done_fence);
+
+ status = IRQ_HANDLED;
+ }
+
+ /* We shouldn't be triggering these if we have GMP in
+ * always-allowed mode.
+ */
+ if (intsts & V3D_INT_GMPV)
+ dev_err(v3d->dev, "GMP violation\n");
+
+ return status;
+}
+
+static irqreturn_t
+v3d_hub_irq(int irq, void *arg)
+{
+ struct v3d_dev *v3d = arg;
+ u32 intsts;
+ irqreturn_t status = IRQ_NONE;
+
+ intsts = V3D_READ(V3D_HUB_INT_STS);
+
+ /* Acknowledge the interrupts we're handling here. */
+ V3D_WRITE(V3D_HUB_INT_CLR, intsts);
+
+ if (intsts & (V3D_HUB_INT_MMU_WRV |
+ V3D_HUB_INT_MMU_PTI |
+ V3D_HUB_INT_MMU_CAP)) {
+ u32 axi_id = V3D_READ(V3D_MMU_VIO_ID);
+ u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8;
+
+ dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n",
+ axi_id, (long long)vio_addr,
+ ((intsts & V3D_HUB_INT_MMU_WRV) ?
+ ", write violation" : ""),
+ ((intsts & V3D_HUB_INT_MMU_PTI) ?
+ ", pte invalid" : ""),
+ ((intsts & V3D_HUB_INT_MMU_CAP) ?
+ ", cap exceeded" : ""));
+ status = IRQ_HANDLED;
+ }
+
+ return status;
+}
+
+void
+v3d_irq_init(struct v3d_dev *v3d)
+{
+ int ret, core;
+
+ INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work);
+
+ /* Clear any pending interrupts someone might have left around
+ * for us.
+ */
+ for (core = 0; core < v3d->cores; core++)
+ V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
+ V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+
+ ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0),
+ v3d_hub_irq, IRQF_SHARED,
+ "v3d_hub", v3d);
+ ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1),
+ v3d_irq, IRQF_SHARED,
+ "v3d_core0", v3d);
+ if (ret)
+ dev_err(v3d->dev, "IRQ setup failed: %d\n", ret);
+
+ v3d_irq_enable(v3d);
+}
+
+void
+v3d_irq_enable(struct v3d_dev *v3d)
+{
+ int core;
+
+ /* Enable our set of interrupts, masking out any others. */
+ for (core = 0; core < v3d->cores; core++) {
+ V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS);
+ V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS);
+ }
+
+ V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS);
+ V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS);
+}
+
+void
+v3d_irq_disable(struct v3d_dev *v3d)
+{
+ int core;
+
+ /* Disable all interrupts. */
+ for (core = 0; core < v3d->cores; core++)
+ V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0);
+ V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0);
+
+ /* Clear any pending interrupts we might have left. */
+ for (core = 0; core < v3d->cores; core++)
+ V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
+ V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+
+ cancel_work_sync(&v3d->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void v3d_irq_reset(struct v3d_dev *v3d)
+{
+ v3d_irq_enable(v3d);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c
new file mode 100644
index 000000000000..b00f97c31b70
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_mmu.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D MMU
+ *
+ * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has
+ * a single level of page tables for the V3D's 4GB address space to
+ * map to AXI bus addresses, thus it could need up to 4MB of
+ * physically contiguous memory to store the PTEs.
+ *
+ * Because the 4MB of contiguous memory for page tables is precious,
+ * and switching between them is expensive, we load all BOs into the
+ * same 4GB address space.
+ *
+ * To protect clients from each other, we should use the GMP to
+ * quickly mask out (at 128kb granularity) what pages are available to
+ * each client. This is not yet implemented.
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_MMU_PAGE_SHIFT 12
+
+/* Note: All PTEs for the 1MB superpage must be filled with the
+ * superpage bit set.
+ */
+#define V3D_PTE_SUPERPAGE BIT(31)
+#define V3D_PTE_WRITEABLE BIT(29)
+#define V3D_PTE_VALID BIT(28)
+
+static int v3d_mmu_flush_all(struct v3d_dev *v3d)
+{
+ int ret;
+
+ /* Make sure that another flush isn't already running when we
+ * start this one.
+ */
+ ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+ V3D_MMU_CTL_TLB_CLEARING), 100);
+ if (ret)
+ dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n");
+
+ V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
+ V3D_MMU_CTL_TLB_CLEAR);
+
+ V3D_WRITE(V3D_MMUC_CONTROL,
+ V3D_MMUC_CONTROL_FLUSH |
+ V3D_MMUC_CONTROL_ENABLE);
+
+ ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+ V3D_MMU_CTL_TLB_CLEARING), 100);
+ if (ret) {
+ dev_err(v3d->dev, "TLB clear wait idle failed\n");
+ return ret;
+ }
+
+ ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
+ V3D_MMUC_CONTROL_FLUSHING), 100);
+ if (ret)
+ dev_err(v3d->dev, "MMUC flush wait idle failed\n");
+
+ return ret;
+}
+
+int v3d_mmu_set_page_table(struct v3d_dev *v3d)
+{
+ V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT);
+ V3D_WRITE(V3D_MMU_CTL,
+ V3D_MMU_CTL_ENABLE |
+ V3D_MMU_CTL_PT_INVALID |
+ V3D_MMU_CTL_PT_INVALID_ABORT |
+ V3D_MMU_CTL_WRITE_VIOLATION_ABORT |
+ V3D_MMU_CTL_CAP_EXCEEDED_ABORT);
+ V3D_WRITE(V3D_MMU_ILLEGAL_ADDR,
+ (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) |
+ V3D_MMU_ILLEGAL_ADDR_ENABLE);
+ V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE);
+
+ return v3d_mmu_flush_all(v3d);
+}
+
+void v3d_mmu_insert_ptes(struct v3d_bo *bo)
+{
+ struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
+ u32 page = bo->node.start;
+ u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID;
+ unsigned int count;
+ struct scatterlist *sgl;
+
+ for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) {
+ u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT;
+ u32 pte = page_prot | page_address;
+ u32 i;
+
+ BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >=
+ BIT(24));
+
+ for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++)
+ v3d->pt[page++] = pte + i;
+ }
+
+ WARN_ON_ONCE(page - bo->node.start !=
+ bo->base.size >> V3D_MMU_PAGE_SHIFT);
+
+ if (v3d_mmu_flush_all(v3d))
+ dev_err(v3d->dev, "MMU flush timeout\n");
+}
+
+void v3d_mmu_remove_ptes(struct v3d_bo *bo)
+{
+ struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
+ u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT;
+ u32 page;
+
+ for (page = bo->node.start; page < bo->node.start + npages; page++)
+ v3d->pt[page] = 0;
+
+ if (v3d_mmu_flush_all(v3d))
+ dev_err(v3d->dev, "MMU flush timeout\n");
+}
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
new file mode 100644
index 000000000000..fc13282dfc2f
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+#ifndef V3D_REGS_H
+#define V3D_REGS_H
+
+#include <linux/bitops.h>
+
+#define V3D_MASK(high, low) ((u32)GENMASK(high, low))
+/* Using the GNU statement expression extension */
+#define V3D_SET_FIELD(value, field) \
+ ({ \
+ u32 fieldval = (value) << field##_SHIFT; \
+ WARN_ON((fieldval & ~field##_MASK) != 0); \
+ fieldval & field##_MASK; \
+ })
+
+#define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \
+ field##_SHIFT)
+
+/* Hub registers for shared hardware between V3D cores. */
+
+#define V3D_HUB_AXICFG 0x00000
+# define V3D_HUB_AXICFG_MAX_LEN_MASK V3D_MASK(3, 0)
+# define V3D_HUB_AXICFG_MAX_LEN_SHIFT 0
+#define V3D_HUB_UIFCFG 0x00004
+#define V3D_HUB_IDENT0 0x00008
+
+#define V3D_HUB_IDENT1 0x0000c
+# define V3D_HUB_IDENT1_WITH_MSO BIT(19)
+# define V3D_HUB_IDENT1_WITH_TSY BIT(18)
+# define V3D_HUB_IDENT1_WITH_TFU BIT(17)
+# define V3D_HUB_IDENT1_WITH_L3C BIT(16)
+# define V3D_HUB_IDENT1_NHOSTS_MASK V3D_MASK(15, 12)
+# define V3D_HUB_IDENT1_NHOSTS_SHIFT 12
+# define V3D_HUB_IDENT1_NCORES_MASK V3D_MASK(11, 8)
+# define V3D_HUB_IDENT1_NCORES_SHIFT 8
+# define V3D_HUB_IDENT1_REV_MASK V3D_MASK(7, 4)
+# define V3D_HUB_IDENT1_REV_SHIFT 4
+# define V3D_HUB_IDENT1_TVER_MASK V3D_MASK(3, 0)
+# define V3D_HUB_IDENT1_TVER_SHIFT 0
+
+#define V3D_HUB_IDENT2 0x00010
+# define V3D_HUB_IDENT2_WITH_MMU BIT(8)
+# define V3D_HUB_IDENT2_L3C_NKB_MASK V3D_MASK(7, 0)
+# define V3D_HUB_IDENT2_L3C_NKB_SHIFT 0
+
+#define V3D_HUB_IDENT3 0x00014
+# define V3D_HUB_IDENT3_IPREV_MASK V3D_MASK(15, 8)
+# define V3D_HUB_IDENT3_IPREV_SHIFT 8
+# define V3D_HUB_IDENT3_IPIDX_MASK V3D_MASK(7, 0)
+# define V3D_HUB_IDENT3_IPIDX_SHIFT 0
+
+#define V3D_HUB_INT_STS 0x00050
+#define V3D_HUB_INT_SET 0x00054
+#define V3D_HUB_INT_CLR 0x00058
+#define V3D_HUB_INT_MSK_STS 0x0005c
+#define V3D_HUB_INT_MSK_SET 0x00060
+#define V3D_HUB_INT_MSK_CLR 0x00064
+# define V3D_HUB_INT_MMU_WRV BIT(5)
+# define V3D_HUB_INT_MMU_PTI BIT(4)
+# define V3D_HUB_INT_MMU_CAP BIT(3)
+# define V3D_HUB_INT_MSO BIT(2)
+# define V3D_HUB_INT_TFUC BIT(1)
+# define V3D_HUB_INT_TFUF BIT(0)
+
+#define V3D_GCA_CACHE_CTRL 0x0000c
+# define V3D_GCA_CACHE_CTRL_FLUSH BIT(0)
+
+#define V3D_GCA_SAFE_SHUTDOWN 0x000b0
+# define V3D_GCA_SAFE_SHUTDOWN_EN BIT(0)
+
+#define V3D_GCA_SAFE_SHUTDOWN_ACK 0x000b4
+# define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED 3
+
+# define V3D_TOP_GR_BRIDGE_REVISION 0x00000
+# define V3D_TOP_GR_BRIDGE_MAJOR_MASK V3D_MASK(15, 8)
+# define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT 8
+# define V3D_TOP_GR_BRIDGE_MINOR_MASK V3D_MASK(7, 0)
+# define V3D_TOP_GR_BRIDGE_MINOR_SHIFT 0
+
+/* 7268 reset reg */
+# define V3D_TOP_GR_BRIDGE_SW_INIT_0 0x00008
+# define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0)
+/* 7278 reset reg */
+# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c
+# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
+
+/* Per-MMU registers. */
+
+#define V3D_MMUC_CONTROL 0x01000
+# define V3D_MMUC_CONTROL_CLEAR BIT(3)
+# define V3D_MMUC_CONTROL_FLUSHING BIT(2)
+# define V3D_MMUC_CONTROL_FLUSH BIT(1)
+# define V3D_MMUC_CONTROL_ENABLE BIT(0)
+
+#define V3D_MMU_CTL 0x01200
+# define V3D_MMU_CTL_CAP_EXCEEDED BIT(27)
+# define V3D_MMU_CTL_CAP_EXCEEDED_ABORT BIT(26)
+# define V3D_MMU_CTL_CAP_EXCEEDED_INT BIT(25)
+# define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION BIT(24)
+# define V3D_MMU_CTL_PT_INVALID BIT(20)
+# define V3D_MMU_CTL_PT_INVALID_ABORT BIT(19)
+# define V3D_MMU_CTL_PT_INVALID_INT BIT(18)
+# define V3D_MMU_CTL_PT_INVALID_EXCEPTION BIT(17)
+# define V3D_MMU_CTL_WRITE_VIOLATION BIT(16)
+# define V3D_MMU_CTL_WRITE_VIOLATION_ABORT BIT(11)
+# define V3D_MMU_CTL_WRITE_VIOLATION_INT BIT(10)
+# define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION BIT(9)
+# define V3D_MMU_CTL_TLB_CLEARING BIT(7)
+# define V3D_MMU_CTL_TLB_STATS_CLEAR BIT(3)
+# define V3D_MMU_CTL_TLB_CLEAR BIT(2)
+# define V3D_MMU_CTL_TLB_STATS_ENABLE BIT(1)
+# define V3D_MMU_CTL_ENABLE BIT(0)
+
+#define V3D_MMU_PT_PA_BASE 0x01204
+#define V3D_MMU_HIT 0x01208
+#define V3D_MMU_MISSES 0x0120c
+#define V3D_MMU_STALLS 0x01210
+
+#define V3D_MMU_ADDR_CAP 0x01214
+# define V3D_MMU_ADDR_CAP_ENABLE BIT(31)
+# define V3D_MMU_ADDR_CAP_MPAGE_MASK V3D_MASK(11, 0)
+# define V3D_MMU_ADDR_CAP_MPAGE_SHIFT 0
+
+#define V3D_MMU_SHOOT_DOWN 0x01218
+# define V3D_MMU_SHOOT_DOWN_SHOOTING BIT(29)
+# define V3D_MMU_SHOOT_DOWN_SHOOT BIT(28)
+# define V3D_MMU_SHOOT_DOWN_PAGE_MASK V3D_MASK(27, 0)
+# define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT 0
+
+#define V3D_MMU_BYPASS_START 0x0121c
+#define V3D_MMU_BYPASS_END 0x01220
+
+/* AXI ID of the access that faulted */
+#define V3D_MMU_VIO_ID 0x0122c
+
+/* Address for illegal PTEs to return */
+#define V3D_MMU_ILLEGAL_ADDR 0x01230
+# define V3D_MMU_ILLEGAL_ADDR_ENABLE BIT(31)
+
+/* Address that faulted */
+#define V3D_MMU_VIO_ADDR 0x01234
+
+/* Per-V3D-core registers */
+
+#define V3D_CTL_IDENT0 0x00000
+# define V3D_IDENT0_VER_MASK V3D_MASK(31, 24)
+# define V3D_IDENT0_VER_SHIFT 24
+
+#define V3D_CTL_IDENT1 0x00004
+/* Multiples of 1kb */
+# define V3D_IDENT1_VPM_SIZE_MASK V3D_MASK(31, 28)
+# define V3D_IDENT1_VPM_SIZE_SHIFT 28
+# define V3D_IDENT1_NSEM_MASK V3D_MASK(23, 16)
+# define V3D_IDENT1_NSEM_SHIFT 16
+# define V3D_IDENT1_NTMU_MASK V3D_MASK(15, 12)
+# define V3D_IDENT1_NTMU_SHIFT 12
+# define V3D_IDENT1_QUPS_MASK V3D_MASK(11, 8)
+# define V3D_IDENT1_QUPS_SHIFT 8
+# define V3D_IDENT1_NSLC_MASK V3D_MASK(7, 4)
+# define V3D_IDENT1_NSLC_SHIFT 4
+# define V3D_IDENT1_REV_MASK V3D_MASK(3, 0)
+# define V3D_IDENT1_REV_SHIFT 0
+
+#define V3D_CTL_IDENT2 0x00008
+# define V3D_IDENT2_BCG_INT BIT(28)
+
+#define V3D_CTL_MISCCFG 0x00018
+# define V3D_MISCCFG_OVRTMUOUT BIT(0)
+
+#define V3D_CTL_L2CACTL 0x00020
+# define V3D_L2CACTL_L2CCLR BIT(2)
+# define V3D_L2CACTL_L2CDIS BIT(1)
+# define V3D_L2CACTL_L2CENA BIT(0)
+
+#define V3D_CTL_SLCACTL 0x00024
+# define V3D_SLCACTL_TVCCS_MASK V3D_MASK(27, 24)
+# define V3D_SLCACTL_TVCCS_SHIFT 24
+# define V3D_SLCACTL_TDCCS_MASK V3D_MASK(19, 16)
+# define V3D_SLCACTL_TDCCS_SHIFT 16
+# define V3D_SLCACTL_UCC_MASK V3D_MASK(11, 8)
+# define V3D_SLCACTL_UCC_SHIFT 8
+# define V3D_SLCACTL_ICC_MASK V3D_MASK(3, 0)
+# define V3D_SLCACTL_ICC_SHIFT 0
+
+#define V3D_CTL_L2TCACTL 0x00030
+# define V3D_L2TCACTL_TMUWCF BIT(8)
+# define V3D_L2TCACTL_L2T_NO_WM BIT(4)
+# define V3D_L2TCACTL_FLM_FLUSH 0
+# define V3D_L2TCACTL_FLM_CLEAR 1
+# define V3D_L2TCACTL_FLM_CLEAN 2
+# define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1)
+# define V3D_L2TCACTL_FLM_SHIFT 1
+# define V3D_L2TCACTL_L2TFLS BIT(0)
+#define V3D_CTL_L2TFLSTA 0x00034
+#define V3D_CTL_L2TFLEND 0x00038
+
+#define V3D_CTL_INT_STS 0x00050
+#define V3D_CTL_INT_SET 0x00054
+#define V3D_CTL_INT_CLR 0x00058
+#define V3D_CTL_INT_MSK_STS 0x0005c
+#define V3D_CTL_INT_MSK_SET 0x00060
+#define V3D_CTL_INT_MSK_CLR 0x00064
+# define V3D_INT_QPU_MASK V3D_MASK(27, 16)
+# define V3D_INT_QPU_SHIFT 16
+# define V3D_INT_GMPV BIT(5)
+# define V3D_INT_TRFB BIT(4)
+# define V3D_INT_SPILLUSE BIT(3)
+# define V3D_INT_OUTOMEM BIT(2)
+# define V3D_INT_FLDONE BIT(1)
+# define V3D_INT_FRDONE BIT(0)
+
+#define V3D_CLE_CT0CS 0x00100
+#define V3D_CLE_CT1CS 0x00104
+#define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n)
+#define V3D_CLE_CT0EA 0x00108
+#define V3D_CLE_CT1EA 0x0010c
+#define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n)
+#define V3D_CLE_CT0CA 0x00110
+#define V3D_CLE_CT1CA 0x00114
+#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n)
+#define V3D_CLE_CT0RA 0x00118
+#define V3D_CLE_CT1RA 0x0011c
+#define V3D_CLE_CT0LC 0x00120
+#define V3D_CLE_CT1LC 0x00124
+#define V3D_CLE_CT0PC 0x00128
+#define V3D_CLE_CT1PC 0x0012c
+#define V3D_CLE_PCS 0x00130
+#define V3D_CLE_BFC 0x00134
+#define V3D_CLE_RFC 0x00138
+#define V3D_CLE_TFBC 0x0013c
+#define V3D_CLE_TFIT 0x00140
+#define V3D_CLE_CT1CFG 0x00144
+#define V3D_CLE_CT1TILECT 0x00148
+#define V3D_CLE_CT1TSKIP 0x0014c
+#define V3D_CLE_CT1PTCT 0x00150
+#define V3D_CLE_CT0SYNC 0x00154
+#define V3D_CLE_CT1SYNC 0x00158
+#define V3D_CLE_CT0QTS 0x0015c
+# define V3D_CLE_CT0QTS_ENABLE BIT(1)
+#define V3D_CLE_CT0QBA 0x00160
+#define V3D_CLE_CT1QBA 0x00164
+#define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n)
+#define V3D_CLE_CT0QEA 0x00168
+#define V3D_CLE_CT1QEA 0x0016c
+#define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n)
+#define V3D_CLE_CT0QMA 0x00170
+#define V3D_CLE_CT0QMS 0x00174
+#define V3D_CLE_CT1QCFG 0x00178
+/* If set without ETPROC, entirely skip tiles with no primitives. */
+# define V3D_CLE_QCFG_ETFILT BIT(7)
+/* If set with ETFILT, just write the clear color to tiles with no
+ * primitives.
+ */
+# define V3D_CLE_QCFG_ETPROC BIT(6)
+# define V3D_CLE_QCFG_ETSFLUSH BIT(1)
+# define V3D_CLE_QCFG_MCDIS BIT(0)
+
+#define V3D_PTB_BPCA 0x00300
+#define V3D_PTB_BPCS 0x00304
+#define V3D_PTB_BPOA 0x00308
+#define V3D_PTB_BPOS 0x0030c
+
+#define V3D_PTB_BXCF 0x00310
+# define V3D_PTB_BXCF_RWORDERDISA BIT(1)
+# define V3D_PTB_BXCF_CLIPDISA BIT(0)
+
+#define V3D_GMP_STATUS 0x00800
+# define V3D_GMP_STATUS_GMPRST BIT(31)
+# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24)
+# define V3D_GMP_STATUS_WR_COUNT_SHIFT 24
+# define V3D_GMP_STATUS_RD_COUNT_MASK V3D_MASK(22, 16)
+# define V3D_GMP_STATUS_RD_COUNT_SHIFT 16
+# define V3D_GMP_STATUS_WR_ACTIVE BIT(5)
+# define V3D_GMP_STATUS_RD_ACTIVE BIT(4)
+# define V3D_GMP_STATUS_CFG_BUSY BIT(3)
+# define V3D_GMP_STATUS_CNTOVF BIT(2)
+# define V3D_GMP_STATUS_INVPROT BIT(1)
+# define V3D_GMP_STATUS_VIO BIT(0)
+
+#define V3D_GMP_CFG 0x00804
+# define V3D_GMP_CFG_LBURSTEN BIT(3)
+# define V3D_GMP_CFG_PGCRSEN BIT()
+# define V3D_GMP_CFG_STOP_REQ BIT(1)
+# define V3D_GMP_CFG_PROT_ENABLE BIT(0)
+
+#define V3D_GMP_VIO_ADDR 0x00808
+#define V3D_GMP_VIO_TYPE 0x0080c
+#define V3D_GMP_TABLE_ADDR 0x00810
+#define V3D_GMP_CLEAR_LOAD 0x00814
+#define V3D_GMP_PRESERVE_LOAD 0x00818
+#define V3D_GMP_VALID_LINES 0x00820
+
+#endif /* V3D_REGS_H */
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
new file mode 100644
index 000000000000..b07bece9417d
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D scheduling
+ *
+ * The shared DRM GPU scheduler is used to coordinate submitting jobs
+ * to the hardware. Each DRM fd (roughly a client process) gets its
+ * own scheduler entity, which will process jobs in order. The GPU
+ * scheduler will round-robin between clients to submit the next job.
+ *
+ * For simplicity, and in order to keep latency low for interactive
+ * jobs when bulk background jobs are queued up, we submit a new job
+ * to the HW only when it has completed the last one, instead of
+ * filling up the CT[01]Q FIFOs with jobs. Similarly, we use
+ * v3d_job_dependency() to manage the dependency between bin and
+ * render, instead of having the clients submit jobs with using the
+ * HW's semaphores to interlock between them.
+ */
+
+#include <linux/kthread.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+static struct v3d_job *
+to_v3d_job(struct drm_sched_job *sched_job)
+{
+ return container_of(sched_job, struct v3d_job, base);
+}
+
+static void
+v3d_job_free(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+
+ v3d_exec_put(job->exec);
+}
+
+/**
+ * Returns the fences that the bin job depends on, one by one.
+ * v3d_job_run() won't be called until all of them have been signaled.
+ */
+static struct dma_fence *
+v3d_job_dependency(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+ struct dma_fence *fence;
+
+ fence = job->in_fence;
+ if (fence) {
+ job->in_fence = NULL;
+ return fence;
+ }
+
+ if (q == V3D_RENDER) {
+ /* If we had a bin job, the render job definitely depends on
+ * it. We first have to wait for bin to be scheduled, so that
+ * its done_fence is created.
+ */
+ fence = exec->bin_done_fence;
+ if (fence) {
+ exec->bin_done_fence = NULL;
+ return fence;
+ }
+ }
+
+ /* XXX: Wait on a fence for switching the GMP if necessary,
+ * and then do so.
+ */
+
+ return fence;
+}
+
+static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+ struct v3d_dev *v3d = exec->v3d;
+ struct drm_device *dev = &v3d->drm;
+ struct dma_fence *fence;
+ unsigned long irqflags;
+
+ if (unlikely(job->base.s_fence->finished.error))
+ return NULL;
+
+ /* Lock required around bin_job update vs
+ * v3d_overflow_mem_work().
+ */
+ spin_lock_irqsave(&v3d->job_lock, irqflags);
+ if (q == V3D_BIN) {
+ v3d->bin_job = job->exec;
+
+ /* Clear out the overflow allocation, so we don't
+ * reuse the overflow attached to a previous job.
+ */
+ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
+ } else {
+ v3d->render_job = job->exec;
+ }
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+ /* Can we avoid this flush when q==RENDER? We need to be
+ * careful of scheduling, though -- imagine job0 rendering to
+ * texture and job1 reading, and them being executed as bin0,
+ * bin1, render0, render1, so that render1's flush at bin time
+ * wasn't enough.
+ */
+ v3d_invalidate_caches(v3d);
+
+ fence = v3d_fence_create(v3d, q);
+ if (!fence)
+ return fence;
+
+ if (job->done_fence)
+ dma_fence_put(job->done_fence);
+ job->done_fence = dma_fence_get(fence);
+
+ trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
+ job->start, job->end);
+
+ if (q == V3D_BIN) {
+ if (exec->qma) {
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
+ }
+ if (exec->qts) {
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
+ V3D_CLE_CT0QTS_ENABLE |
+ exec->qts);
+ }
+ } else {
+ /* XXX: Set the QCFG */
+ }
+
+ /* Set the current and end address of the control list.
+ * Writing the end register is what starts the job.
+ */
+ V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
+ V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
+
+ return fence;
+}
+
+static void
+v3d_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ struct v3d_dev *v3d = exec->v3d;
+ enum v3d_queue q;
+
+ mutex_lock(&v3d->reset_lock);
+
+ /* block scheduler */
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
+
+ kthread_park(sched->thread);
+ drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
+ sched_job : NULL));
+ }
+
+ /* get the GPU back into the init state */
+ v3d_reset(v3d);
+
+ /* Unblock schedulers and restart their jobs. */
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ drm_sched_job_recovery(&v3d->queue[q].sched);
+ kthread_unpark(v3d->queue[q].sched.thread);
+ }
+
+ mutex_unlock(&v3d->reset_lock);
+}
+
+static const struct drm_sched_backend_ops v3d_sched_ops = {
+ .dependency = v3d_job_dependency,
+ .run_job = v3d_job_run,
+ .timedout_job = v3d_job_timedout,
+ .free_job = v3d_job_free
+};
+
+int
+v3d_sched_init(struct v3d_dev *v3d)
+{
+ int hw_jobs_limit = 1;
+ int job_hang_limit = 0;
+ int hang_limit_ms = 500;
+ int ret;
+
+ ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
+ &v3d_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_bin");
+ if (ret) {
+ dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
+ return ret;
+ }
+
+ ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
+ &v3d_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_render");
+ if (ret) {
+ dev_err(v3d->dev, "Failed to create render scheduler: %d.",
+ ret);
+ drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+ return ret;
+ }
+
+ return 0;
+}
+
+void
+v3d_sched_fini(struct v3d_dev *v3d)
+{
+ enum v3d_queue q;
+
+ for (q = 0; q < V3D_MAX_QUEUES; q++)
+ drm_sched_fini(&v3d->queue[q].sched);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
new file mode 100644
index 000000000000..85dd351e1e09
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+#if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _V3D_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM v3d
+#define TRACE_INCLUDE_FILE v3d_trace
+
+TRACE_EVENT(v3d_submit_cl,
+ TP_PROTO(struct drm_device *dev, bool is_render,
+ uint64_t seqno,
+ u32 ctnqba, u32 ctnqea),
+ TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(bool, is_render)
+ __field(u64, seqno)
+ __field(u32, ctnqba)
+ __field(u32, ctnqea)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->is_render = is_render;
+ __entry->seqno = seqno;
+ __entry->ctnqba = ctnqba;
+ __entry->ctnqea = ctnqea;
+ ),
+
+ TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x",
+ __entry->dev,
+ __entry->is_render ? "RCL" : "BCL",
+ __entry->seqno,
+ __entry->ctnqba,
+ __entry->ctnqea)
+);
+
+TRACE_EVENT(v3d_reset_begin,
+ TP_PROTO(struct drm_device *dev),
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ ),
+
+ TP_printk("dev=%u",
+ __entry->dev)
+);
+
+TRACE_EVENT(v3d_reset_end,
+ TP_PROTO(struct drm_device *dev),
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ ),
+
+ TP_printk("dev=%u",
+ __entry->dev)
+);
+
+#endif /* _V3D_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/v3d/v3d_trace_points.c b/drivers/gpu/drm/v3d/v3d_trace_points.c
new file mode 100644
index 000000000000..482922d7c7e1
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_trace_points.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015 Broadcom */
+
+#include "v3d_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "v3d_trace.h"
+#endif