66 files changed, 4506 insertions, 681 deletions
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt
index 3f903af93949..5ff4f64ef8e8 100644
--- a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt
+++ b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt
@@ -18,7 +18,14 @@ This device has two video ports. Their connections are modeled using the OF
 graph bindings specified in [1]. Each port node shall have a single endpoint.
 
 - Port 0 is the DPI input port. Its endpoint subnode shall contain a
-  pclk-sample property and a remote-endpoint property as specified in [1].
+  pclk-sample and bus-width property and a remote-endpoint property as specified
+  in [1].
+  - If pclk-sample is not defined, pclk-sample = 0 should be assumed for
+    backward compatibility.
+  - If bus-width is not defined then bus-width = 24 should be assumed for
+    backward compatibility.
+    bus-width = 24: 24 data lines are connected and single-edge mode
+    bus-width = 12: 12 data lines are connected and dual-edge mode
 
 - Port 1 is the DVI output port. Its endpoint subnode shall contain a
   remote-endpoint property is specified in [1].
@@ -43,6 +50,7 @@ tfp410: encoder@0 {
 
 			tfp410_in: endpoint@0 {
 				pclk-sample = <1>;
+				bus-width = <24>;
 				remote-endpoint = <&dpi_out>;
 			};
 		};
diff --git a/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt b/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt
new file mode 100644
index 000000000000..fc1e1b325e49
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt
@@ -0,0 +1,7 @@
+LG ACX467AKM-7 4.95" 1080×1920 LCD Panel
+
+Required properties:
+- compatible: must be "lg,acx467akm-7"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/ste,mcde.txt b/Documentation/devicetree/bindings/display/ste,mcde.txt
new file mode 100644
index 000000000000..4c33c692bd5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/ste,mcde.txt
@@ -0,0 +1,104 @@
+ST-Ericsson Multi Channel Display Engine MCDE
+
+The ST-Ericsson MCDE is a display controller with support for compositing
+and displaying several channels memory resident graphics data on DSI or
+LCD displays or bridges. It is used in the ST-Ericsson U8500 platform.
+
+Required properties:
+
+- compatible: must be:
+  "ste,mcde"
+- reg: register base for the main MCDE control registers, should be
+  0x1000 in size
+- interrupts: the interrupt line for the MCDE
+- epod-supply: a phandle to the EPOD regulator
+- vana-supply: a phandle to the analog voltage regulator
+- clocks: an array of the MCDE clocks in this strict order:
+  MCDECLK (main MCDE clock), LCDCLK (LCD clock), PLLDSI
+  (HDMI clock), DSI0ESCLK (DSI0 energy save clock),
+  DSI1ESCLK (DSI1 energy save clock), DSI2ESCLK (DSI2 energy
+  save clock)
+- clock-names: must be the following array:
+  "mcde", "lcd", "hdmi"
+  to match the required clock inputs above.
+- #address-cells: should be <1> (for the DSI hosts that will be children)
+- #size-cells: should be <1> (for the DSI hosts that will be children)
+- ranges: this should always be stated
+
+Required subnodes:
+
+The devicetree must specify subnodes for the DSI host adapters.
+These must have the following characteristics:
+
+- compatible: must be:
+  "ste,mcde-dsi"
+- reg: must specify the register range for the DSI host
+- vana-supply: phandle to the VANA voltage regulator
+- clocks: phandles to the high speed and low power (energy save) clocks
+  the high speed clock is not present on the third (dsi2) block, so it
+  should only have the "lp" clock
+- clock-names: "hs" for the high speed clock and "lp" for the low power
+  (energy save) clock
+- #address-cells: should be <1>
+- #size-cells: should be <0>
+
+Display panels and bridges will appear as children on the DSI hosts, and
+the displays are connected to the DSI hosts using the common binding
+for video transmitter interfaces; see
+Documentation/devicetree/bindings/media/video-interfaces.txt
+
+If a DSI host is unused (not connected) it will have no children defined.
+
+Example:
+
+mcde@a0350000 {
+	compatible = "ste,mcde";
+	reg = <0xa0350000 0x1000>;
+	interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+	epod-supply = <&db8500_b2r2_mcde_reg>;
+	vana-supply = <&ab8500_ldo_ana_reg>;
+	clocks = <&prcmu_clk PRCMU_MCDECLK>, /* Main MCDE clock */
+		 <&prcmu_clk PRCMU_LCDCLK>, /* LCD clock */
+		 <&prcmu_clk PRCMU_PLLDSI>; /* HDMI clock */
+	clock-names = "mcde", "lcd", "hdmi";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges;
+
+	dsi0: dsi@a0351000 {
+		compatible = "ste,mcde-dsi";
+		reg = <0xa0351000 0x1000>;
+		vana-supply = <&ab8500_ldo_ana_reg>;
+		clocks = <&prcmu_clk PRCMU_DSI0CLK>, <&prcmu_clk PRCMU_DSI0ESCCLK>;
+		clock-names = "hs", "lp";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		panel {
+			compatible = "samsung,s6d16d0";
+			reg = <0>;
+			vdd1-supply = <&ab8500_ldo_aux1_reg>;
+			reset-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
+		};
+
+	};
+	dsi1: dsi@a0352000 {
+		compatible = "ste,mcde-dsi";
+		reg = <0xa0352000 0x1000>;
+		vana-supply = <&ab8500_ldo_ana_reg>;
+		clocks = <&prcmu_clk PRCMU_DSI1CLK>, <&prcmu_clk PRCMU_DSI1ESCCLK>;
+		clock-names = "hs", "lp";
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+	dsi2: dsi@a0353000 {
+		compatible = "ste,mcde-dsi";
+		reg = <0xa0353000 0x1000>;
+		vana-supply = <&ab8500_ldo_ana_reg>;
+		/* This DSI port only has the Low Power / Energy Save clock */
+		clocks = <&prcmu_clk PRCMU_DSI2ESCCLK>;
+		clock-names = "lp";
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+};
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index 58b375e47615..14102ae035dc 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -107,6 +107,12 @@ fbdev Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c
    :export:
 
+format Helper Functions Reference
+=================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_format_helper.c
+   :export:
+
 Framebuffer CMA Helper Functions Reference
 ==========================================
 
diff --git a/MAINTAINERS b/MAINTAINERS
index bbb95d66a220..e233b3c48546 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1180,6 +1180,15 @@ F:	drivers/gpu/drm/arm/
 F:	Documentation/devicetree/bindings/display/arm,malidp.txt
 F:	Documentation/gpu/afbc.rst
 
+ARM MALI PANFROST DRM DRIVER
+M:	Rob Herring <robh@kernel.org>
+M:	Tomeu Vizoso <tomeu.vizoso@collabora.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Supported
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	drivers/gpu/drm/panfrost/
+F:	include/uapi/drm/panfrost_drm.h
+
 ARM MFM AND FLOPPY DRIVERS
 M:	Ian Molton <spyro@f2s.com>
 S:	Maintained
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index c729f98a7bd3..93c42078cb57 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -193,6 +193,7 @@ static void dma_fence_chain_release(struct dma_fence *fence)
 }
 
 const struct dma_fence_ops dma_fence_chain_ops = {
+	.use_64bit_seqno = true,
 	.get_driver_name = dma_fence_chain_get_driver_name,
 	.get_timeline_name = dma_fence_chain_get_timeline_name,
 	.enable_signaling = dma_fence_chain_enable_signaling,
@@ -225,7 +226,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
 	init_irq_work(&chain->work, dma_fence_chain_irq_work);
 
 	/* Try to reuse the context of the previous chain node. */
-	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
+	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) {
 		context = prev->context;
 		chain->prev_seqno = prev->seqno;
 	} else {
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 32dcf7b4c935..119b2ffbc2c9 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -161,7 +161,7 @@ static bool timeline_fence_signaled(struct dma_fence *fence)
 {
 	struct sync_timeline *parent = dma_fence_parent(fence);
 
-	return !__dma_fence_is_later(fence->seqno, parent->value);
+	return !__dma_fence_is_later(fence->seqno, parent->value, fence->ops);
 }
 
 static bool timeline_fence_enable_signaling(struct dma_fence *fence)
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 4f6305ca52c8..ed3fb6e5224c 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -258,7 +258,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 
 			i_b++;
 		} else {
-			if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno))
+			if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno,
+						 pt_a->ops))
 				add_fence(fences, &i, pt_a);
 			else
 				add_fence(fences, &i, pt_b);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index bcbc4234893a..39d5f7562f1c 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -337,6 +337,8 @@ source "drivers/gpu/drm/vboxvideo/Kconfig"
 
 source "drivers/gpu/drm/lima/Kconfig"
 
+source "drivers/gpu/drm/panfrost/Kconfig"
+
 source "drivers/gpu/drm/aspeed/Kconfig"
 
 # Keep legacy drivers last
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 7ebae3d45505..3d0c75cd687c 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -112,4 +112,5 @@ obj-$(CONFIG_DRM_TVE200) += tve200/
 obj-$(CONFIG_DRM_XEN) += xen/
 obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
 obj-$(CONFIG_DRM_LIMA)  += lima/
+obj-$(CONFIG_DRM_PANFROST) += panfrost/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
diff --git a/drivers/gpu/drm/aspeed/Kconfig b/drivers/gpu/drm/aspeed/Kconfig
index 42b74d18a41b..cccab520e02f 100644
--- a/drivers/gpu/drm/aspeed/Kconfig
+++ b/drivers/gpu/drm/aspeed/Kconfig
@@ -1,11 +1,11 @@
 config DRM_ASPEED_GFX
 	tristate "ASPEED BMC Display Controller"
 	depends on DRM && OF
+	depends on (COMPILE_TEST || ARCH_ASPEED)
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
-	select DRM_PANEL
-	select DMA_CMA
-	select CMA
+	select DMA_CMA if HAVE_DMA_CONTIGUOUS
+	select CMA if HAVE_DMA_CONTIGUOUS
 	select MFD_SYSCON
 	help
 	  Chose this option if you have an ASPEED AST2500 SOC Display
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 049d058571d4..341cc9d1bab4 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -7,6 +7,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include <drm/drm_gem.h>
 
@@ -69,9 +70,8 @@ struct bochs_device {
 	struct edid *edid;
 
 	/* drm */
-	struct drm_device  *dev;
-	struct drm_crtc crtc;
-	struct drm_encoder encoder;
+	struct drm_device *dev;
+	struct drm_simple_display_pipe pipe;
 	struct drm_connector connector;
 
 	/* ttm */
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 485f9cf05e8b..5e905f50449d 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -22,76 +22,55 @@ MODULE_PARM_DESC(defy, "default y resolution");
 
 /* ---------------------------------------------------------------------- */
 
-static void bochs_crtc_mode_set_nofb(struct drm_crtc *crtc)
+static const uint32_t bochs_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_BGRX8888,
+};
+
+static void bochs_plane_update(struct bochs_device *bochs,
+			       struct drm_plane_state *state)
 {
-	struct bochs_device *bochs =
-		container_of(crtc, struct bochs_device, crtc);
+	struct bochs_bo *bo;
 
-	bochs_hw_setmode(bochs, &crtc->mode);
+	if (!state->fb || !bochs->stride)
+		return;
+
+	bo = gem_to_bochs_bo(state->fb->obj[0]);
+	bochs_hw_setbase(bochs,
+			 state->crtc_x,
+			 state->crtc_y,
+			 bo->bo.offset);
+	bochs_hw_setformat(bochs, state->fb->format);
 }
 
-static void bochs_crtc_atomic_enable(struct drm_crtc *crtc,
-				     struct drm_crtc_state *old_crtc_state)
+static void bochs_pipe_enable(struct drm_simple_display_pipe *pipe,
+			      struct drm_crtc_state *crtc_state,
+			      struct drm_plane_state *plane_state)
 {
+	struct bochs_device *bochs = pipe->crtc.dev->dev_private;
+
+	bochs_hw_setmode(bochs, &crtc_state->mode);
+	bochs_plane_update(bochs, plane_state);
 }
 
-static void bochs_crtc_atomic_flush(struct drm_crtc *crtc,
-				    struct drm_crtc_state *old_crtc_state)
+static void bochs_pipe_update(struct drm_simple_display_pipe *pipe,
+			      struct drm_plane_state *old_state)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_pending_vblank_event *event;
+	struct bochs_device *bochs = pipe->crtc.dev->dev_private;
+	struct drm_crtc *crtc = &pipe->crtc;
 
-	if (crtc->state && crtc->state->event) {
-		unsigned long irqflags;
+	bochs_plane_update(bochs, pipe->plane.state);
 
-		spin_lock_irqsave(&dev->event_lock, irqflags);
-		event = crtc->state->event;
+	if (crtc->state->event) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
 		crtc->state->event = NULL;
-		drm_crtc_send_vblank_event(crtc, event);
-		spin_unlock_irqrestore(&dev->event_lock, irqflags);
+		spin_unlock_irq(&crtc->dev->event_lock);
 	}
 }
 
-
-/* These provide the minimum set of functions required to handle a CRTC */
-static const struct drm_crtc_funcs bochs_crtc_funcs = {
-	.set_config = drm_atomic_helper_set_config,
-	.destroy = drm_crtc_cleanup,
-	.page_flip = drm_atomic_helper_page_flip,
-	.reset = drm_atomic_helper_crtc_reset,
-	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
-};
-
-static const struct drm_crtc_helper_funcs bochs_helper_funcs = {
-	.mode_set_nofb = bochs_crtc_mode_set_nofb,
-	.atomic_enable = bochs_crtc_atomic_enable,
-	.atomic_flush = bochs_crtc_atomic_flush,
-};
-
-static const uint32_t bochs_formats[] = {
-	DRM_FORMAT_XRGB8888,
-	DRM_FORMAT_BGRX8888,
-};
-
-static void bochs_plane_atomic_update(struct drm_plane *plane,
-				      struct drm_plane_state *old_state)
-{
-	struct bochs_device *bochs = plane->dev->dev_private;
-	struct bochs_bo *bo;
-
-	if (!plane->state->fb)
-		return;
-	bo = gem_to_bochs_bo(plane->state->fb->obj[0]);
-	bochs_hw_setbase(bochs,
-			 plane->state->crtc_x,
-			 plane->state->crtc_y,
-			 bo->bo.offset);
-	bochs_hw_setformat(bochs, plane->state->fb->format);
-}
-
-static int bochs_plane_prepare_fb(struct drm_plane *plane,
-				struct drm_plane_state *new_state)
+static int bochs_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
+				 struct drm_plane_state *new_state)
 {
 	struct bochs_bo *bo;
 
@@ -101,8 +80,8 @@ static int bochs_plane_prepare_fb(struct drm_plane *plane,
 	return bochs_bo_pin(bo, TTM_PL_FLAG_VRAM);
 }
 
-static void bochs_plane_cleanup_fb(struct drm_plane *plane,
-				   struct drm_plane_state *old_state)
+static void bochs_pipe_cleanup_fb(struct drm_simple_display_pipe *pipe,
+				  struct drm_plane_state *old_state)
 {
 	struct bochs_bo *bo;
 
@@ -112,73 +91,13 @@ static void bochs_plane_cleanup_fb(struct drm_plane *plane,
 	bochs_bo_unpin(bo);
 }
 
-static const struct drm_plane_helper_funcs bochs_plane_helper_funcs = {
-	.atomic_update = bochs_plane_atomic_update,
-	.prepare_fb = bochs_plane_prepare_fb,
-	.cleanup_fb = bochs_plane_cleanup_fb,
-};
-
-static const struct drm_plane_funcs bochs_plane_funcs = {
-       .update_plane   = drm_atomic_helper_update_plane,
-       .disable_plane  = drm_atomic_helper_disable_plane,
-       .destroy        = drm_primary_helper_destroy,
-       .reset          = drm_atomic_helper_plane_reset,
-       .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
-       .atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
-};
-
-static struct drm_plane *bochs_primary_plane(struct drm_device *dev)
-{
-	struct drm_plane *primary;
-	int ret;
-
-	primary = kzalloc(sizeof(*primary), GFP_KERNEL);
-	if (primary == NULL) {
-		DRM_DEBUG_KMS("Failed to allocate primary plane\n");
-		return NULL;
-	}
-
-	ret = drm_universal_plane_init(dev, primary, 0,
-				       &bochs_plane_funcs,
-				       bochs_formats,
-				       ARRAY_SIZE(bochs_formats),
-				       NULL,
-				       DRM_PLANE_TYPE_PRIMARY, NULL);
-	if (ret) {
-		kfree(primary);
-		return NULL;
-	}
-
-	drm_plane_helper_add(primary, &bochs_plane_helper_funcs);
-	return primary;
-}
-
-static void bochs_crtc_init(struct drm_device *dev)
-{
-	struct bochs_device *bochs = dev->dev_private;
-	struct drm_crtc *crtc = &bochs->crtc;
-	struct drm_plane *primary = bochs_primary_plane(dev);
-
-	drm_crtc_init_with_planes(dev, crtc, primary, NULL,
-				  &bochs_crtc_funcs, NULL);
-	drm_crtc_helper_add(crtc, &bochs_helper_funcs);
-}
-
-static const struct drm_encoder_funcs bochs_encoder_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
+static const struct drm_simple_display_pipe_funcs bochs_pipe_funcs = {
+	.enable	    = bochs_pipe_enable,
+	.update	    = bochs_pipe_update,
+	.prepare_fb = bochs_pipe_prepare_fb,
+	.cleanup_fb = bochs_pipe_cleanup_fb,
 };
 
-static void bochs_encoder_init(struct drm_device *dev)
-{
-	struct bochs_device *bochs = dev->dev_private;
-	struct drm_encoder *encoder = &bochs->encoder;
-
-	encoder->possible_crtcs = 0x1;
-	drm_encoder_init(dev, encoder, &bochs_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC, NULL);
-}
-
-
 static int bochs_connector_get_modes(struct drm_connector *connector)
 {
 	struct bochs_device *bochs =
@@ -278,11 +197,14 @@ int bochs_kms_init(struct bochs_device *bochs)
 
 	bochs->dev->mode_config.funcs = &bochs_mode_funcs;
 
-	bochs_crtc_init(bochs->dev);
-	bochs_encoder_init(bochs->dev);
 	bochs_connector_init(bochs->dev);
-	drm_connector_attach_encoder(&bochs->connector,
-					  &bochs->encoder);
+	drm_simple_display_pipe_init(bochs->dev,
+				     &bochs->pipe,
+				     &bochs_pipe_funcs,
+				     bochs_formats,
+				     ARRAY_SIZE(bochs_formats),
+				     NULL,
+				     &bochs->connector);
 
 	drm_mode_config_reset(bochs->dev);
 
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index 285be4a0f4bd..8b0e71bd3ca7 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -29,8 +29,10 @@ struct tfp410 {
 	struct drm_connector	connector;
 	unsigned int		connector_type;
 
+	u32			bus_format;
 	struct i2c_adapter	*ddc;
 	struct gpio_desc	*hpd;
+	int			hpd_irq;
 	struct delayed_work	hpd_work;
 	struct gpio_desc	*powerdown;
 
@@ -124,8 +126,10 @@ static int tfp410_attach(struct drm_bridge *bridge)
 		return -ENODEV;
 	}
 
-	if (dvi->hpd)
+	if (dvi->hpd_irq >= 0)
 		dvi->connector.polled = DRM_CONNECTOR_POLL_HPD;
+	else
+		dvi->connector.polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT;
 
 	drm_connector_helper_add(&dvi->connector,
 				 &tfp410_con_helper_funcs);
@@ -136,6 +140,9 @@ static int tfp410_attach(struct drm_bridge *bridge)
 		return ret;
 	}
 
+	drm_display_info_set_bus_formats(&dvi->connector.display_info,
+					 &dvi->bus_format, 1);
+
 	drm_connector_attach_encoder(&dvi->connector,
 					  bridge->encoder);
 
@@ -194,6 +201,7 @@ static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c)
 	struct drm_bridge_timings *timings = &dvi->timings;
 	struct device_node *ep;
 	u32 pclk_sample = 0;
+	u32 bus_width = 24;
 	s32 deskew = 0;
 
 	/* Start with defaults. */
@@ -218,6 +226,7 @@ static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c)
 
 	/* Get the sampling edge from the endpoint. */
 	of_property_read_u32(ep, "pclk-sample", &pclk_sample);
+	of_property_read_u32(ep, "bus-width", &bus_width);
 	of_node_put(ep);
 
 	timings->input_bus_flags = DRM_BUS_FLAG_DE_HIGH;
@@ -235,6 +244,17 @@ static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c)
 		return -EINVAL;
 	}
 
+	switch (bus_width) {
+	case 12:
+		dvi->bus_format = MEDIA_BUS_FMT_RGB888_2X12_LE;
+		break;
+	case 24:
+		dvi->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* Get the setup and hold time from vendor-specific properties. */
 	of_property_read_u32(dvi->dev->of_node, "ti,deskew", (u32 *)&deskew);
 	if (deskew < -4 || deskew > 3)
@@ -324,10 +344,15 @@ static int tfp410_init(struct device *dev, bool i2c)
 		return PTR_ERR(dvi->powerdown);
 	}
 
-	if (dvi->hpd) {
+	if (dvi->hpd)
+		dvi->hpd_irq = gpiod_to_irq(dvi->hpd);
+	else
+		dvi->hpd_irq = -ENXIO;
+
+	if (dvi->hpd_irq >= 0) {
 		INIT_DELAYED_WORK(&dvi->hpd_work, tfp410_hpd_work_func);
 
-		ret = devm_request_threaded_irq(dev, gpiod_to_irq(dvi->hpd),
+		ret = devm_request_threaded_irq(dev, dvi->hpd_irq,
 			NULL, tfp410_hpd_irq_thread, IRQF_TRIGGER_RISING |
 			IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 			"hdmi-hpd", dvi);
diff --git a/drivers/gpu/drm/cirrus/cirrus.c b/drivers/gpu/drm/cirrus/cirrus.c
index 5095b8ce52c2..be4ea370ba31 100644
--- a/drivers/gpu/drm/cirrus/cirrus.c
+++ b/drivers/gpu/drm/cirrus/cirrus.c
@@ -307,16 +307,16 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
 		return -ENOMEM;
 
 	if (cirrus->cpp == fb->format->cpp[0])
-		drm_fb_memcpy_dstclip(__io_virt(cirrus->vram),
+		drm_fb_memcpy_dstclip(cirrus->vram,
 				      vmap, fb, rect);
 
 	else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2)
-		drm_fb_xrgb8888_to_rgb565_dstclip(__io_virt(cirrus->vram),
+		drm_fb_xrgb8888_to_rgb565_dstclip(cirrus->vram,
 						  cirrus->pitch,
 						  vmap, fb, rect, false);
 
 	else if (fb->format->cpp[0] == 4 && cirrus->cpp == 3)
-		drm_fb_xrgb8888_to_rgb888_dstclip(__io_virt(cirrus->vram),
+		drm_fb_xrgb8888_to_rgb888_dstclip(cirrus->vram,
 						  cirrus->pitch,
 						  vmap, fb, rect);
 
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 9b2bd28dde0a..f20d1dda3961 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -69,7 +69,8 @@ EXPORT_SYMBOL(drm_client_close);
  * @name: Client name
  * @funcs: DRM client functions (optional)
  *
- * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process.
+ * This initialises the client and opens a &drm_file.
+ * Use drm_client_register() to complete the process.
  * The caller needs to hold a reference on @dev before calling this function.
  * The client is freed when the &drm_device is unregistered. See drm_client_release().
  *
@@ -108,16 +109,16 @@ err_put_module:
 EXPORT_SYMBOL(drm_client_init);
 
 /**
- * drm_client_add - Add client to the device list
+ * drm_client_register - Register client
  * @client: DRM client
  *
  * Add the client to the &drm_device client list to activate its callbacks.
  * @client must be initialized by a call to drm_client_init(). After
- * drm_client_add() it is no longer permissible to call drm_client_release()
+ * drm_client_register() it is no longer permissible to call drm_client_release()
  * directly (outside the unregister callback), instead cleanup will happen
  * automatically on driver unload.
  */
-void drm_client_add(struct drm_client_dev *client)
+void drm_client_register(struct drm_client_dev *client)
 {
 	struct drm_device *dev = client->dev;
 
@@ -125,7 +126,7 @@ void drm_client_add(struct drm_client_dev *client)
 	list_add(&client->list, &dev->clientlist);
 	mutex_unlock(&dev->clientlist_mutex);
 }
-EXPORT_SYMBOL(drm_client_add);
+EXPORT_SYMBOL(drm_client_register);
 
 /**
  * drm_client_release - Release DRM client resources
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index d1ce7bd04cad..4de4b9d59d49 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -2559,6 +2559,194 @@ static void drm_setup_crtc_rotation(struct drm_fb_helper *fb_helper,
 	fb_helper->sw_rotations |= DRM_MODE_ROTATE_0;
 }
 
+static struct drm_fb_helper_crtc *
+drm_fb_helper_crtc(struct drm_fb_helper *fb_helper, struct drm_crtc *crtc)
+{
+	int i;
+
+	for (i = 0; i < fb_helper->crtc_count; i++)
+		if (fb_helper->crtc_info[i].mode_set.crtc == crtc)
+			return &fb_helper->crtc_info[i];
+
+	return NULL;
+}
+
+/* Try to read the BIOS display configuration and use it for the initial config */
+static bool drm_fb_helper_firmware_config(struct drm_fb_helper *fb_helper,
+					  struct drm_fb_helper_crtc **crtcs,
+					  struct drm_display_mode **modes,
+					  struct drm_fb_offset *offsets,
+					  bool *enabled, int width, int height)
+{
+	struct drm_device *dev = fb_helper->dev;
+	unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
+	unsigned long conn_configured, conn_seq;
+	int i, j;
+	bool *save_enabled;
+	bool fallback = true, ret = true;
+	int num_connectors_enabled = 0;
+	int num_connectors_detected = 0;
+	struct drm_modeset_acquire_ctx ctx;
+
+	save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
+	if (!save_enabled)
+		return false;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+	while (drm_modeset_lock_all_ctx(dev, &ctx) != 0)
+		drm_modeset_backoff(&ctx);
+
+	memcpy(save_enabled, enabled, count);
+	conn_seq = GENMASK(count - 1, 0);
+	conn_configured = 0;
+retry:
+	for (i = 0; i < count; i++) {
+		struct drm_fb_helper_connector *fb_conn;
+		struct drm_connector *connector;
+		struct drm_encoder *encoder;
+		struct drm_fb_helper_crtc *new_crtc;
+
+		fb_conn = fb_helper->connector_info[i];
+		connector = fb_conn->connector;
+
+		if (conn_configured & BIT(i))
+			continue;
+
+		/* First pass, only consider tiled connectors */
+		if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+			continue;
+
+		if (connector->status == connector_status_connected)
+			num_connectors_detected++;
+
+		if (!enabled[i]) {
+			DRM_DEBUG_KMS("connector %s not enabled, skipping\n",
+				      connector->name);
+			conn_configured |= BIT(i);
+			continue;
+		}
+
+		if (connector->force == DRM_FORCE_OFF) {
+			DRM_DEBUG_KMS("connector %s is disabled by user, skipping\n",
+				      connector->name);
+			enabled[i] = false;
+			continue;
+		}
+
+		encoder = connector->state->best_encoder;
+		if (!encoder || WARN_ON(!connector->state->crtc)) {
+			if (connector->force > DRM_FORCE_OFF)
+				goto bail;
+
+			DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n",
+				      connector->name);
+			enabled[i] = false;
+			conn_configured |= BIT(i);
+			continue;
+		}
+
+		num_connectors_enabled++;
+
+		new_crtc = drm_fb_helper_crtc(fb_helper, connector->state->crtc);
+
+		/*
+		 * Make sure we're not trying to drive multiple connectors
+		 * with a single CRTC, since our cloning support may not
+		 * match the BIOS.
+		 */
+		for (j = 0; j < count; j++) {
+			if (crtcs[j] == new_crtc) {
+				DRM_DEBUG_KMS("fallback: cloned configuration\n");
+				goto bail;
+			}
+		}
+
+		DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n",
+			      connector->name);
+
+		/* go for command line mode first */
+		modes[i] = drm_pick_cmdline_mode(fb_conn);
+
+		/* try for preferred next */
+		if (!modes[i]) {
+			DRM_DEBUG_KMS("looking for preferred mode on connector %s %d\n",
+				      connector->name, connector->has_tile);
+			modes[i] = drm_has_preferred_mode(fb_conn, width,
+							  height);
+		}
+
+		/* No preferred mode marked by the EDID? Are there any modes? */
+		if (!modes[i] && !list_empty(&connector->modes)) {
+			DRM_DEBUG_KMS("using first mode listed on connector %s\n",
+				      connector->name);
+			modes[i] = list_first_entry(&connector->modes,
+						    struct drm_display_mode,
+						    head);
+		}
+
+		/* last resort: use current mode */
+		if (!modes[i]) {
+			/*
+			 * IMPORTANT: We want to use the adjusted mode (i.e.
+			 * after the panel fitter upscaling) as the initial
+			 * config, not the input mode, which is what crtc->mode
+			 * usually contains. But since our current
+			 * code puts a mode derived from the post-pfit timings
+			 * into crtc->mode this works out correctly.
+			 *
+			 * This is crtc->mode and not crtc->state->mode for the
+			 * fastboot check to work correctly.
+			 */
+			DRM_DEBUG_KMS("looking for current mode on connector %s\n",
+				      connector->name);
+			modes[i] = &connector->state->crtc->mode;
+		}
+		crtcs[i] = new_crtc;
+
+		DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n",
+			      connector->name,
+			      connector->state->crtc->base.id,
+			      connector->state->crtc->name,
+			      modes[i]->hdisplay, modes[i]->vdisplay,
+			      modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" : "");
+
+		fallback = false;
+		conn_configured |= BIT(i);
+	}
+
+	if (conn_configured != conn_seq) { /* repeat until no more are found */
+		conn_seq = conn_configured;
+		goto retry;
+	}
+
+	/*
+	 * If the BIOS didn't enable everything it could, fall back to have the
+	 * same user experiencing of lighting up as much as possible like the
+	 * fbdev helper library.
+	 */
+	if (num_connectors_enabled != num_connectors_detected &&
+	    num_connectors_enabled < dev->mode_config.num_crtc) {
+		DRM_DEBUG_KMS("fallback: Not all outputs enabled\n");
+		DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled,
+			      num_connectors_detected);
+		fallback = true;
+	}
+
+	if (fallback) {
+bail:
+		DRM_DEBUG_KMS("Not using firmware configuration\n");
+		memcpy(enabled, save_enabled, count);
+		ret = false;
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
+	kfree(save_enabled);
+	return ret;
+}
+
 static void drm_setup_crtcs(struct drm_fb_helper *fb_helper,
 			    u32 width, u32 height)
 {
@@ -2591,10 +2779,8 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper,
 		DRM_DEBUG_KMS("No connectors reported connected with modes\n");
 	drm_enable_connectors(fb_helper, enabled);
 
-	if (!(fb_helper->funcs->initial_config &&
-	      fb_helper->funcs->initial_config(fb_helper, crtcs, modes,
-					       offsets,
-					       enabled, width, height))) {
+	if (!drm_fb_helper_firmware_config(fb_helper, crtcs, modes, offsets,
+					   enabled, width, height)) {
 		memset(modes, 0, fb_helper->connector_count*sizeof(modes[0]));
 		memset(crtcs, 0, fb_helper->connector_count*sizeof(crtcs[0]));
 		memset(offsets, 0, fb_helper->connector_count*sizeof(offsets[0]));
@@ -3322,7 +3508,7 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 	if (ret)
 		DRM_DEV_DEBUG(dev->dev, "client hotplug ret=%d\n", ret);
 
-	drm_client_add(&fb_helper->client);
+	drm_client_register(&fb_helper->client);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index 00d716f14173..a18da35145b7 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -10,23 +10,17 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 
 #include <drm/drm_format_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_rect.h>
 
-static void drm_fb_memcpy_lines(void *dst, unsigned int dst_pitch,
-				void *src, unsigned int src_pitch,
-				unsigned int linelength, unsigned int lines)
+static unsigned int clip_offset(struct drm_rect *clip,
+				unsigned int pitch, unsigned int cpp)
 {
-	int line;
-
-	for (line = 0; line < lines; line++) {
-		memcpy(dst, src, linelength);
-		src += src_pitch;
-		dst += dst_pitch;
-	}
+	return clip->y1 * pitch + clip->x1 * cpp;
 }
 
 /**
@@ -43,35 +37,44 @@ void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 		   struct drm_rect *clip)
 {
 	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
-	unsigned int offset = (clip->y1 * fb->pitches[0]) + (clip->x1 * cpp);
 	size_t len = (clip->x2 - clip->x1) * cpp;
+	unsigned int y, lines = clip->y2 - clip->y1;
 
-	drm_fb_memcpy_lines(dst, len,
-			    vaddr + offset, fb->pitches[0],
-			    len, clip->y2 - clip->y1);
+	vaddr += clip_offset(clip, fb->pitches[0], cpp);
+	for (y = 0; y < lines; y++) {
+		memcpy(dst, vaddr, len);
+		vaddr += fb->pitches[0];
+		dst += len;
+	}
 }
 EXPORT_SYMBOL(drm_fb_memcpy);
 
 /**
  * drm_fb_memcpy_dstclip - Copy clip buffer
- * @dst: Destination buffer
+ * @dst: Destination buffer (iomem)
  * @vaddr: Source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
  *
  * This function applies clipping on dst, i.e. the destination is a
- * full framebuffer but only the clip rect content is copied over.
+ * full (iomem) framebuffer but only the clip rect content is copied over.
  */
-void drm_fb_memcpy_dstclip(void *dst, void *vaddr, struct drm_framebuffer *fb,
+void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
+			   struct drm_framebuffer *fb,
 			   struct drm_rect *clip)
 {
 	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
-	unsigned int offset = (clip->y1 * fb->pitches[0]) + (clip->x1 * cpp);
+	unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
 	size_t len = (clip->x2 - clip->x1) * cpp;
+	unsigned int y, lines = clip->y2 - clip->y1;
 
-	drm_fb_memcpy_lines(dst + offset, fb->pitches[0],
-			    vaddr + offset, fb->pitches[0],
-			    len, clip->y2 - clip->y1);
+	vaddr += offset;
+	dst += offset;
+	for (y = 0; y < lines; y++) {
+		memcpy_toio(dst, vaddr, len);
+		vaddr += fb->pitches[0];
+		dst += fb->pitches[0];
+	}
 }
 EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
 
@@ -110,42 +113,22 @@ void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
 }
 EXPORT_SYMBOL(drm_fb_swab16);
 
-static void drm_fb_xrgb8888_to_rgb565_lines(void *dst, unsigned int dst_pitch,
-					    void *src, unsigned int src_pitch,
-					    unsigned int src_linelength,
-					    unsigned int lines,
-					    bool swap)
+static void drm_fb_xrgb8888_to_rgb565_line(u16 *dbuf, u32 *sbuf,
+					   unsigned int pixels,
+					   bool swab)
 {
-	unsigned int linepixels = src_linelength / sizeof(u32);
-	unsigned int x, y;
-	u32 *sbuf;
-	u16 *dbuf, val16;
-
-	/*
-	 * The cma memory is write-combined so reads are uncached.
-	 * Speed up by fetching one line at a time.
-	 */
-	sbuf = kmalloc(src_linelength, GFP_KERNEL);
-	if (!sbuf)
-		return;
-
-	for (y = 0; y < lines; y++) {
-		memcpy(sbuf, src, src_linelength);
-		dbuf = dst;
-		for (x = 0; x < linepixels; x++) {
-			val16 = ((sbuf[x] & 0x00F80000) >> 8) |
-				((sbuf[x] & 0x0000FC00) >> 5) |
-				((sbuf[x] & 0x000000F8) >> 3);
-			if (swap)
-				*dbuf++ = swab16(val16);
-			else
-				*dbuf++ = val16;
-		}
-		src += src_pitch;
-		dst += dst_pitch;
+	unsigned int x;
+	u16 val16;
+
+	for (x = 0; x < pixels; x++) {
+		val16 = ((sbuf[x] & 0x00F80000) >> 8) |
+			((sbuf[x] & 0x0000FC00) >> 5) |
+			((sbuf[x] & 0x000000F8) >> 3);
+		if (swab)
+			dbuf[x] = swab16(val16);
+		else
+			dbuf[x] = val16;
 	}
-
-	kfree(sbuf);
 }
 
 /**
@@ -154,7 +137,7 @@ static void drm_fb_xrgb8888_to_rgb565_lines(void *dst, unsigned int dst_pitch,
  * @vaddr: XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
- * @swap: Swap bytes
+ * @swab: Swap bytes
  *
  * Drivers can use this function for RGB565 devices that don't natively
  * support XRGB8888.
@@ -164,109 +147,124 @@ static void drm_fb_xrgb8888_to_rgb565_lines(void *dst, unsigned int dst_pitch,
  */
 void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
 			       struct drm_framebuffer *fb,
-			       struct drm_rect *clip, bool swap)
+			       struct drm_rect *clip, bool swab)
 {
-	unsigned int src_offset = (clip->y1 * fb->pitches[0])
-		+ (clip->x1 * sizeof(u32));
-	size_t src_len = (clip->x2 - clip->x1) * sizeof(u32);
-	size_t dst_len = (clip->x2 - clip->x1) * sizeof(u16);
-
-	drm_fb_xrgb8888_to_rgb565_lines(dst, dst_len,
-					vaddr + src_offset, fb->pitches[0],
-					src_len, clip->y2 - clip->y1,
-					swap);
+	size_t linepixels = clip->x2 - clip->x1;
+	size_t src_len = linepixels * sizeof(u32);
+	size_t dst_len = linepixels * sizeof(u16);
+	unsigned y, lines = clip->y2 - clip->y1;
+	void *sbuf;
+
+	/*
+	 * The cma memory is write-combined so reads are uncached.
+	 * Speed up by fetching one line at a time.
+	 */
+	sbuf = kmalloc(src_len, GFP_KERNEL);
+	if (!sbuf)
+		return;
+
+	vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32));
+	for (y = 0; y < lines; y++) {
+		memcpy(sbuf, vaddr, src_len);
+		drm_fb_xrgb8888_to_rgb565_line(dst, sbuf, linepixels, swab);
+		vaddr += fb->pitches[0];
+		dst += dst_len;
+	}
+
+	kfree(sbuf);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565);
 
 /**
  * drm_fb_xrgb8888_to_rgb565_dstclip - Convert XRGB8888 to RGB565 clip buffer
- * @dst: RGB565 destination buffer
+ * @dst: RGB565 destination buffer (iomem)
  * @dst_pitch: destination buffer pitch
  * @vaddr: XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
- * @swap: Swap bytes
+ * @swab: Swap bytes
  *
  * Drivers can use this function for RGB565 devices that don't natively
  * support XRGB8888.
  *
  * This function applies clipping on dst, i.e. the destination is a
- * full framebuffer but only the clip rect content is copied over.
+ * full (iomem) framebuffer but only the clip rect content is copied over.
  */
-void drm_fb_xrgb8888_to_rgb565_dstclip(void *dst, unsigned int dst_pitch,
+void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch,
 				       void *vaddr, struct drm_framebuffer *fb,
-				       struct drm_rect *clip, bool swap)
-{
-	unsigned int src_offset = (clip->y1 * fb->pitches[0])
-		+ (clip->x1 * sizeof(u32));
-	unsigned int dst_offset = (clip->y1 * dst_pitch)
-		+ (clip->x1 * sizeof(u16));
-	size_t src_len = (clip->x2 - clip->x1) * sizeof(u32);
-
-	drm_fb_xrgb8888_to_rgb565_lines(dst + dst_offset, dst_pitch,
-					vaddr + src_offset, fb->pitches[0],
-					src_len, clip->y2 - clip->y1,
-					swap);
-}
-EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565_dstclip);
-
-static void drm_fb_xrgb8888_to_rgb888_lines(void *dst, unsigned int dst_pitch,
-					    void *src, unsigned int src_pitch,
-					    unsigned int src_linelength,
-					    unsigned int lines)
+				       struct drm_rect *clip, bool swab)
 {
-	unsigned int linepixels = src_linelength / 3;
-	unsigned int x, y;
-	u32 *sbuf;
-	u8 *dbuf;
+	size_t linepixels = clip->x2 - clip->x1;
+	size_t dst_len = linepixels * sizeof(u16);
+	unsigned y, lines = clip->y2 - clip->y1;
+	void *dbuf;
 
-	sbuf = kmalloc(src_linelength, GFP_KERNEL);
-	if (!sbuf)
+	dbuf = kmalloc(dst_len, GFP_KERNEL);
+	if (!dbuf)
 		return;
 
+	vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32));
+	dst += clip_offset(clip, dst_pitch, sizeof(u16));
 	for (y = 0; y < lines; y++) {
-		memcpy(sbuf, src, src_linelength);
-		dbuf = dst;
-		for (x = 0; x < linepixels; x++) {
-			*dbuf++ = (sbuf[x] & 0x000000FF) >>  0;
-			*dbuf++ = (sbuf[x] & 0x0000FF00) >>  8;
-			*dbuf++ = (sbuf[x] & 0x00FF0000) >> 16;
-		}
-		src += src_pitch;
-		dst += dst_pitch;
+		drm_fb_xrgb8888_to_rgb565_line(dbuf, vaddr, linepixels, swab);
+		memcpy_toio(dst, dbuf, dst_len);
+		vaddr += fb->pitches[0];
+		dst += dst_len;
 	}
 
-	kfree(sbuf);
+	kfree(dbuf);
+}
+EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565_dstclip);
+
+static void drm_fb_xrgb8888_to_rgb888_line(u8 *dbuf, u32 *sbuf,
+					   unsigned int pixels)
+{
+	unsigned int x;
+
+	for (x = 0; x < pixels; x++) {
+		*dbuf++ = (sbuf[x] & 0x000000FF) >>  0;
+		*dbuf++ = (sbuf[x] & 0x0000FF00) >>  8;
+		*dbuf++ = (sbuf[x] & 0x00FF0000) >> 16;
+	}
 }
 
 /**
  * drm_fb_xrgb8888_to_rgb888_dstclip - Convert XRGB8888 to RGB888 clip buffer
- * @dst: RGB565 destination buffer
+ * @dst: RGB565 destination buffer (iomem)
  * @dst_pitch: destination buffer pitch
  * @vaddr: XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
- * @dstclip: Clip destination too.
  *
  * Drivers can use this function for RGB888 devices that don't natively
  * support XRGB8888.
  *
  * This function applies clipping on dst, i.e. the destination is a
- * full framebuffer but only the clip rect content is copied over.
+ * full (iomem) framebuffer but only the clip rect content is copied over.
  */
-void drm_fb_xrgb8888_to_rgb888_dstclip(void *dst, unsigned int dst_pitch,
+void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch,
 				       void *vaddr, struct drm_framebuffer *fb,
 				       struct drm_rect *clip)
 {
-	unsigned int src_offset = (clip->y1 * fb->pitches[0])
-		+ (clip->x1 * sizeof(u32));
-	unsigned int dst_offset = (clip->y1 * dst_pitch)
-		+ (clip->x1 * 3);
-	size_t src_len = (clip->x2 - clip->x1) * sizeof(u32);
-
-	drm_fb_xrgb8888_to_rgb888_lines(dst + dst_offset, dst_pitch,
-					vaddr + src_offset, fb->pitches[0],
-					src_len, clip->y2 - clip->y1);
+	size_t linepixels = clip->x2 - clip->x1;
+	size_t dst_len = linepixels * 3;
+	unsigned y, lines = clip->y2 - clip->y1;
+	void *dbuf;
+
+	dbuf = kmalloc(dst_len, GFP_KERNEL);
+	if (!dbuf)
+		return;
+
+	vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32));
+	dst += clip_offset(clip, dst_pitch, sizeof(u16));
+	for (y = 0; y < lines; y++) {
+		drm_fb_xrgb8888_to_rgb888_line(dbuf, vaddr, linepixels);
+		memcpy_toio(dst, dbuf, dst_len);
+		vaddr += fb->pitches[0];
+		dst += dst_len;
+	}
+
+	kfree(dbuf);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888_dstclip);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 52c0a837a3b2..fae4676707b6 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -646,6 +646,85 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
 }
 EXPORT_SYMBOL(drm_gem_put_pages);
 
+static int objects_lookup(struct drm_file *filp, u32 *handle, int count,
+			  struct drm_gem_object **objs)
+{
+	int i, ret = 0;
+	struct drm_gem_object *obj;
+
+	spin_lock(&filp->table_lock);
+
+	for (i = 0; i < count; i++) {
+		/* Check if we currently have a reference on the object */
+		obj = idr_find(&filp->object_idr, handle[i]);
+		if (!obj) {
+			ret = -ENOENT;
+			break;
+		}
+		drm_gem_object_get(obj);
+		objs[i] = obj;
+	}
+	spin_unlock(&filp->table_lock);
+
+	return ret;
+}
+
+/**
+ * drm_gem_objects_lookup - look up GEM objects from an array of handles
+ * @filp: DRM file private date
+ * @bo_handles: user pointer to array of userspace handle
+ * @count: size of handle array
+ * @objs_out: returned pointer to array of drm_gem_object pointers
+ *
+ * Takes an array of userspace handles and returns a newly allocated array of
+ * GEM objects.
+ *
+ * For a single handle lookup, use drm_gem_object_lookup().
+ *
+ * Returns:
+ *
+ * @objs filled in with GEM object pointers. Returned GEM objects need to be
+ * released with drm_gem_object_put(). -ENOENT is returned on a lookup
+ * failure. 0 is returned on success.
+ *
+ */
+int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles,
+			   int count, struct drm_gem_object ***objs_out)
+{
+	int ret;
+	u32 *handles;
+	struct drm_gem_object **objs;
+
+	if (!count)
+		return 0;
+
+	objs = kvmalloc_array(count, sizeof(struct drm_gem_object *),
+			     GFP_KERNEL | __GFP_ZERO);
+	if (!objs)
+		return -ENOMEM;
+
+	handles = kvmalloc_array(count, sizeof(u32), GFP_KERNEL);
+	if (!handles) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (copy_from_user(handles, bo_handles, count * sizeof(u32))) {
+		ret = -EFAULT;
+		DRM_DEBUG("Failed to copy in GEM handles\n");
+		goto out;
+	}
+
+	ret = objects_lookup(filp, handles, count, objs);
+	*objs_out = objs;
+
+out:
+	kvfree(handles);
+	return ret;
+
+}
+EXPORT_SYMBOL(drm_gem_objects_lookup);
+
 /**
  * drm_gem_object_lookup - look up a GEM object from its handle
  * @filp: DRM file private date
@@ -655,21 +734,15 @@ EXPORT_SYMBOL(drm_gem_put_pages);
  *
  * A reference to the object named by the handle if such exists on @filp, NULL
  * otherwise.
+ *
+ * If looking up an array of handles, use drm_gem_objects_lookup().
  */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_file *filp, u32 handle)
 {
-	struct drm_gem_object *obj;
-
-	spin_lock(&filp->table_lock);
-
-	/* Check if we currently have a reference on the object */
-	obj = idr_find(&filp->object_idr, handle);
-	if (obj)
-		drm_gem_object_get(obj);
-
-	spin_unlock(&filp->table_lock);
+	struct drm_gem_object *obj = NULL;
 
+	objects_lookup(filp, &handle, 1, &obj);
 	return obj;
 }
 EXPORT_SYMBOL(drm_gem_object_lookup);
@@ -1294,3 +1367,96 @@ drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
 	ww_acquire_fini(acquire_ctx);
 }
 EXPORT_SYMBOL(drm_gem_unlock_reservations);
+
+/**
+ * drm_gem_fence_array_add - Adds the fence to an array of fences to be
+ * waited on, deduplicating fences from the same context.
+ *
+ * @fence_array array of dma_fence * for the job to block on.
+ * @fence the dma_fence to add to the list of dependencies.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_gem_fence_array_add(struct xarray *fence_array,
+			    struct dma_fence *fence)
+{
+	struct dma_fence *entry;
+	unsigned long index;
+	u32 id = 0;
+	int ret;
+
+	if (!fence)
+		return 0;
+
+	/* Deduplicate if we already depend on a fence from the same context.
+	 * This lets the size of the array of deps scale with the number of
+	 * engines involved, rather than the number of BOs.
+	 */
+	xa_for_each(fence_array, index, entry) {
+		if (entry->context != fence->context)
+			continue;
+
+		if (dma_fence_is_later(fence, entry)) {
+			dma_fence_put(entry);
+			xa_store(fence_array, index, fence, GFP_KERNEL);
+		} else {
+			dma_fence_put(fence);
+		}
+		return 0;
+	}
+
+	ret = xa_alloc(fence_array, &id, fence, xa_limit_32b, GFP_KERNEL);
+	if (ret != 0)
+		dma_fence_put(fence);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_fence_array_add);
+
+/**
+ * drm_gem_fence_array_add_implicit - Adds the implicit dependencies tracked
+ * in the GEM object's reservation object to an array of dma_fences for use in
+ * scheduling a rendering job.
+ *
+ * This should be called after drm_gem_lock_reservations() on your array of
+ * GEM objects used in the job but before updating the reservations with your
+ * own fences.
+ *
+ * @fence_array array of dma_fence * for the job to block on.
+ * @obj the gem object to add new dependencies from.
+ * @write whether the job might write the object (so we need to depend on
+ * shared fences in the reservation object).
+ */
+int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
+				     struct drm_gem_object *obj,
+				     bool write)
+{
+	int ret;
+	struct dma_fence **fences;
+	unsigned int i, fence_count;
+
+	if (!write) {
+		struct dma_fence *fence =
+			reservation_object_get_excl_rcu(obj->resv);
+
+		return drm_gem_fence_array_add(fence_array, fence);
+	}
+
+	ret = reservation_object_get_fences_rcu(obj->resv, NULL,
+						&fence_count, &fences);
+	if (ret || !fence_count)
+		return ret;
+
+	for (i = 0; i < fence_count; i++) {
+		ret = drm_gem_fence_array_add(fence_array, fences[i]);
+		if (ret)
+			break;
+	}
+
+	for (; i < fence_count; i++)
+		dma_fence_put(fences[i]);
+	kfree(fences);
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index 4a1c2023ccf0..1a346ae1599d 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -297,8 +297,9 @@ static int drm_mode_create_standard_properties(struct drm_device *dev)
 		return -ENOMEM;
 	dev->mode_config.prop_crtc_id = prop;
 
-	prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, "FB_DAMAGE_CLIPS",
-				   0);
+	prop = drm_property_create(dev,
+			DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB,
+			"FB_DAMAGE_CLIPS", 0);
 	if (!prop)
 		return -ENOMEM;
 	dev->mode_config.prop_fb_damage_clips = prop;
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index bc532e99b5dc..89db71996148 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -285,225 +285,7 @@ out_unlock:
 	return ret;
 }
 
-static struct drm_fb_helper_crtc *
-intel_fb_helper_crtc(struct drm_fb_helper *fb_helper, struct drm_crtc *crtc)
-{
-	int i;
-
-	for (i = 0; i < fb_helper->crtc_count; i++)
-		if (fb_helper->crtc_info[i].mode_set.crtc == crtc)
-			return &fb_helper->crtc_info[i];
-
-	return NULL;
-}
-
-/*
- * Try to read the BIOS display configuration and use it for the initial
- * fb configuration.
- *
- * The BIOS or boot loader will generally create an initial display
- * configuration for us that includes some set of active pipes and displays.
- * This routine tries to figure out which pipes and connectors are active
- * and stuffs them into the crtcs and modes array given to us by the
- * drm_fb_helper code.
- *
- * The overall sequence is:
- *   intel_fbdev_init - from driver load
- *     intel_fbdev_init_bios - initialize the intel_fbdev using BIOS data
- *     drm_fb_helper_init - build fb helper structs
- *     drm_fb_helper_single_add_all_connectors - more fb helper structs
- *   intel_fbdev_initial_config - apply the config
- *     drm_fb_helper_initial_config - call ->probe then register_framebuffer()
- *         drm_setup_crtcs - build crtc config for fbdev
- *           intel_fb_initial_config - find active connectors etc
- *         drm_fb_helper_single_fb_probe - set up fbdev
- *           intelfb_create - re-use or alloc fb, build out fbdev structs
- *
- * Note that we don't make special consideration whether we could actually
- * switch to the selected modes without a full modeset. E.g. when the display
- * is in VGA mode we need to recalculate watermarks and set a new high-res
- * framebuffer anyway.
- */
-static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
-				    struct drm_fb_helper_crtc **crtcs,
-				    struct drm_display_mode **modes,
-				    struct drm_fb_offset *offsets,
-				    bool *enabled, int width, int height)
-{
-	struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-	unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
-	unsigned long conn_configured, conn_seq;
-	int i, j;
-	bool *save_enabled;
-	bool fallback = true, ret = true;
-	int num_connectors_enabled = 0;
-	int num_connectors_detected = 0;
-	struct drm_modeset_acquire_ctx ctx;
-
-	save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
-	if (!save_enabled)
-		return false;
-
-	drm_modeset_acquire_init(&ctx, 0);
-
-	while (drm_modeset_lock_all_ctx(fb_helper->dev, &ctx) != 0)
-		drm_modeset_backoff(&ctx);
-
-	memcpy(save_enabled, enabled, count);
-	conn_seq = GENMASK(count - 1, 0);
-	conn_configured = 0;
-retry:
-	for (i = 0; i < count; i++) {
-		struct drm_fb_helper_connector *fb_conn;
-		struct drm_connector *connector;
-		struct drm_encoder *encoder;
-		struct drm_fb_helper_crtc *new_crtc;
-
-		fb_conn = fb_helper->connector_info[i];
-		connector = fb_conn->connector;
-
-		if (conn_configured & BIT(i))
-			continue;
-
-		/* First pass, only consider tiled connectors */
-		if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
-			continue;
-
-		if (connector->status == connector_status_connected)
-			num_connectors_detected++;
-
-		if (!enabled[i]) {
-			DRM_DEBUG_KMS("connector %s not enabled, skipping\n",
-				      connector->name);
-			conn_configured |= BIT(i);
-			continue;
-		}
-
-		if (connector->force == DRM_FORCE_OFF) {
-			DRM_DEBUG_KMS("connector %s is disabled by user, skipping\n",
-				      connector->name);
-			enabled[i] = false;
-			continue;
-		}
-
-		encoder = connector->state->best_encoder;
-		if (!encoder || WARN_ON(!connector->state->crtc)) {
-			if (connector->force > DRM_FORCE_OFF)
-				goto bail;
-
-			DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n",
-				      connector->name);
-			enabled[i] = false;
-			conn_configured |= BIT(i);
-			continue;
-		}
-
-		num_connectors_enabled++;
-
-		new_crtc = intel_fb_helper_crtc(fb_helper,
-						connector->state->crtc);
-
-		/*
-		 * Make sure we're not trying to drive multiple connectors
-		 * with a single CRTC, since our cloning support may not
-		 * match the BIOS.
-		 */
-		for (j = 0; j < count; j++) {
-			if (crtcs[j] == new_crtc) {
-				DRM_DEBUG_KMS("fallback: cloned configuration\n");
-				goto bail;
-			}
-		}
-
-		DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n",
-			      connector->name);
-
-		/* go for command line mode first */
-		modes[i] = drm_pick_cmdline_mode(fb_conn);
-
-		/* try for preferred next */
-		if (!modes[i]) {
-			DRM_DEBUG_KMS("looking for preferred mode on connector %s %d\n",
-				      connector->name, connector->has_tile);
-			modes[i] = drm_has_preferred_mode(fb_conn, width,
-							  height);
-		}
-
-		/* No preferred mode marked by the EDID? Are there any modes? */
-		if (!modes[i] && !list_empty(&connector->modes)) {
-			DRM_DEBUG_KMS("using first mode listed on connector %s\n",
-				      connector->name);
-			modes[i] = list_first_entry(&connector->modes,
-						    struct drm_display_mode,
-						    head);
-		}
-
-		/* last resort: use current mode */
-		if (!modes[i]) {
-			/*
-			 * IMPORTANT: We want to use the adjusted mode (i.e.
-			 * after the panel fitter upscaling) as the initial
-			 * config, not the input mode, which is what crtc->mode
-			 * usually contains. But since our current
-			 * code puts a mode derived from the post-pfit timings
-			 * into crtc->mode this works out correctly.
-			 *
-			 * This is crtc->mode and not crtc->state->mode for the
-			 * fastboot check to work correctly. crtc_state->mode has
-			 * I915_MODE_FLAG_INHERITED, which we clear to force check
-			 * state.
-			 */
-			DRM_DEBUG_KMS("looking for current mode on connector %s\n",
-				      connector->name);
-			modes[i] = &connector->state->crtc->mode;
-		}
-		crtcs[i] = new_crtc;
-
-		DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n",
-			      connector->name,
-			      connector->state->crtc->base.id,
-			      connector->state->crtc->name,
-			      modes[i]->hdisplay, modes[i]->vdisplay,
-			      modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" :"");
-
-		fallback = false;
-		conn_configured |= BIT(i);
-	}
-
-	if (conn_configured != conn_seq) { /* repeat until no more are found */
-		conn_seq = conn_configured;
-		goto retry;
-	}
-
-	/*
-	 * If the BIOS didn't enable everything it could, fall back to have the
-	 * same user experiencing of lighting up as much as possible like the
-	 * fbdev helper library.
-	 */
-	if (num_connectors_enabled != num_connectors_detected &&
-	    num_connectors_enabled < INTEL_INFO(dev_priv)->num_pipes) {
-		DRM_DEBUG_KMS("fallback: Not all outputs enabled\n");
-		DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled,
-			      num_connectors_detected);
-		fallback = true;
-	}
-
-	if (fallback) {
-bail:
-		DRM_DEBUG_KMS("Not using firmware configuration\n");
-		memcpy(enabled, save_enabled, count);
-		ret = false;
-	}
-
-	drm_modeset_drop_locks(&ctx);
-	drm_modeset_acquire_fini(&ctx);
-
-	kfree(save_enabled);
-	return ret;
-}
-
 static const struct drm_fb_helper_funcs intel_fb_helper_funcs = {
-	.initial_config = intel_fb_initial_config,
 	.fb_probe = intelfb_create,
 };
 
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
index 1d69498bc17e..477c0f766663 100644
--- a/drivers/gpu/drm/lima/lima_gem.c
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -145,40 +145,7 @@ static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
 	if (explicit)
 		return 0;
 
-	/* implicit sync use bo fence in resv obj */
-	if (write) {
-		unsigned nr_fences;
-		struct dma_fence **fences;
-		int i;
-
-		err = reservation_object_get_fences_rcu(
-			bo->gem.resv, NULL, &nr_fences, &fences);
-		if (err || !nr_fences)
-			return err;
-
-		for (i = 0; i < nr_fences; i++) {
-			err = lima_sched_task_add_dep(task, fences[i]);
-			if (err)
-				break;
-		}
-
-		/* for error case free remaining fences */
-		for ( ; i < nr_fences; i++)
-			dma_fence_put(fences[i]);
-
-		kfree(fences);
-	} else {
-		struct dma_fence *fence;
-
-		fence = reservation_object_get_excl_rcu(bo->gem.resv);
-		if (fence) {
-			err = lima_sched_task_add_dep(task, fence);
-			if (err)
-				dma_fence_put(fence);
-		}
-	}
-
-	return err;
+	return drm_gem_fence_array_add_implicit(&task->deps, &bo->gem, write);
 }
 
 static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
@@ -251,7 +218,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
 		if (err)
 			return err;
 
-		err = lima_sched_task_add_dep(submit->task, fence);
+		err = drm_gem_fence_array_add(&submit->task->deps, fence);
 		if (err) {
 			dma_fence_put(fence);
 			return err;
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index 97bd9c1deb87..d53bd45f8d96 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -3,6 +3,7 @@
 
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/xarray.h>
 
 #include "lima_drv.h"
 #include "lima_sched.h"
@@ -126,19 +127,24 @@ int lima_sched_task_init(struct lima_sched_task *task,
 
 	task->num_bos = num_bos;
 	task->vm = lima_vm_get(vm);
+
+	xa_init_flags(&task->deps, XA_FLAGS_ALLOC);
+
 	return 0;
 }
 
 void lima_sched_task_fini(struct lima_sched_task *task)
 {
+	struct dma_fence *fence;
+	unsigned long index;
 	int i;
 
 	drm_sched_job_cleanup(&task->base);
 
-	for (i = 0; i < task->num_dep; i++)
-		dma_fence_put(task->dep[i]);
-
-	kfree(task->dep);
+	xa_for_each(&task->deps, index, fence) {
+		dma_fence_put(fence);
+	}
+	xa_destroy(&task->deps);
 
 	if (task->bos) {
 		for (i = 0; i < task->num_bos; i++)
@@ -149,42 +155,6 @@ void lima_sched_task_fini(struct lima_sched_task *task)
 	lima_vm_put(task->vm);
 }
 
-int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
-{
-	int i, new_dep = 4;
-
-	/* same context's fence is definitly earlier then this task */
-	if (fence->context == task->base.s_fence->finished.context) {
-		dma_fence_put(fence);
-		return 0;
-	}
-
-	if (task->dep && task->num_dep == task->max_dep)
-		new_dep = task->max_dep * 2;
-
-	if (task->max_dep < new_dep) {
-		void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
-
-		if (!dep)
-			return -ENOMEM;
-
-		task->max_dep = new_dep;
-		task->dep = dep;
-	}
-
-	for (i = 0; i < task->num_dep; i++) {
-		if (task->dep[i]->context == fence->context &&
-		    dma_fence_is_later(fence, task->dep[i])) {
-			dma_fence_put(task->dep[i]);
-			task->dep[i] = fence;
-			return 0;
-		}
-	}
-
-	task->dep[task->num_dep++] = fence;
-	return 0;
-}
-
 int lima_sched_context_init(struct lima_sched_pipe *pipe,
 			    struct lima_sched_context *context,
 			    atomic_t *guilty)
@@ -213,21 +183,9 @@ static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
 					       struct drm_sched_entity *entity)
 {
 	struct lima_sched_task *task = to_lima_task(job);
-	int i;
-
-	for (i = 0; i < task->num_dep; i++) {
-		struct dma_fence *fence = task->dep[i];
-
-		if (!task->dep[i])
-			continue;
-
-		task->dep[i] = NULL;
 
-		if (!dma_fence_is_signaled(fence))
-			return fence;
-
-		dma_fence_put(fence);
-	}
+	if (!xa_empty(&task->deps))
+		return xa_erase(&task->deps, task->last_dep++);
 
 	return NULL;
 }
@@ -353,7 +311,7 @@ static void lima_sched_free_job(struct drm_sched_job *job)
 	kmem_cache_free(pipe->task_slab, task);
 }
 
-const struct drm_sched_backend_ops lima_sched_ops = {
+static const struct drm_sched_backend_ops lima_sched_ops = {
 	.dependency = lima_sched_dependency,
 	.run_job = lima_sched_run_job,
 	.timedout_job = lima_sched_timedout_job,
diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
index b017cfa7e327..928af91c1118 100644
--- a/drivers/gpu/drm/lima/lima_sched.h
+++ b/drivers/gpu/drm/lima/lima_sched.h
@@ -14,9 +14,8 @@ struct lima_sched_task {
 	struct lima_vm *vm;
 	void *frame;
 
-	struct dma_fence **dep;
-	int num_dep;
-	int max_dep;
+	struct xarray deps;
+	unsigned long last_dep;
 
 	struct lima_bo **bos;
 	int num_bos;
@@ -78,7 +77,6 @@ int lima_sched_task_init(struct lima_sched_task *task,
 			 struct lima_bo **bos, int num_bos,
 			 struct lima_vm *vm);
 void lima_sched_task_fini(struct lima_sched_task *task);
-int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
 
 int lima_sched_context_init(struct lima_sched_pipe *pipe,
 			    struct lima_sched_context *context,
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index faf1b1b0357c..72b01e6be0d9 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -90,6 +90,18 @@ static irqreturn_t meson_irq(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static int meson_dumb_create(struct drm_file *file, struct drm_device *dev,
+			     struct drm_mode_create_dumb *args)
+{
+	/*
+	 * We need 64bytes aligned stride, and PAGE aligned size
+	 */
+	args->pitch = ALIGN(DIV_ROUND_UP(args->width * args->bpp, 8), SZ_64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	return drm_gem_cma_dumb_create_internal(file, dev, args);
+}
+
 DEFINE_DRM_GEM_CMA_FOPS(fops);
 
 static struct drm_driver meson_driver = {
@@ -112,7 +124,7 @@ static struct drm_driver meson_driver = {
 	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
 
 	/* GEM Ops */
-	.dumb_create		= drm_gem_cma_dumb_create,
+	.dumb_create		= meson_dumb_create,
 	.gem_free_object_unlocked = drm_gem_cma_free_object,
 	.gem_vm_ops		= &drm_gem_cma_vm_ops,
 
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index 0169c98b01c9..b59072342cae 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -90,8 +90,8 @@ static int eotf_bypass_coeff[EOTF_COEFF_SIZE] = {
 	EOTF_COEFF_RIGHTSHIFT /* right shift */
 };
 
-void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv, int *m,
-				   bool csc_on)
+static void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv,
+					   int *m, bool csc_on)
 {
 	/* VPP WRAP OSD1 matrix */
 	writel(((m[0] & 0xfff) << 16) | (m[1] & 0xfff),
@@ -118,8 +118,8 @@ void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv, int *m,
 		priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_EN_CTRL));
 }
 
-void meson_viu_set_osd_matrix(struct meson_drm *priv,
-			      enum viu_matrix_sel_e m_select,
+static void meson_viu_set_osd_matrix(struct meson_drm *priv,
+				     enum viu_matrix_sel_e m_select,
 			      int *m, bool csc_on)
 {
 	if (m_select == VIU_MATRIX_OSD) {
@@ -187,10 +187,10 @@ void meson_viu_set_osd_matrix(struct meson_drm *priv,
 #define OSD_EOTF_LUT_SIZE 33
 #define OSD_OETF_LUT_SIZE 41
 
-void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel,
-			   unsigned int *r_map, unsigned int *g_map,
-			   unsigned int *b_map,
-			   bool csc_on)
+static void
+meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel,
+		      unsigned int *r_map, unsigned int *g_map,
+		      unsigned int *b_map, bool csc_on)
 {
 	unsigned int addr_port;
 	unsigned int data_port;
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 8fee7a8b29d9..569be4efd8d1 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -3025,6 +3025,34 @@ static const struct panel_desc_dsi panasonic_vvx10f004b00 = {
 	.lanes = 4,
 };
 
+static const struct drm_display_mode lg_acx467akm_7_mode = {
+	.clock = 150000,
+	.hdisplay = 1080,
+	.hsync_start = 1080 + 2,
+	.hsync_end = 1080 + 2 + 2,
+	.htotal = 1080 + 2 + 2 + 2,
+	.vdisplay = 1920,
+	.vsync_start = 1920 + 2,
+	.vsync_end = 1920 + 2 + 2,
+	.vtotal = 1920 + 2 + 2 + 2,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc_dsi lg_acx467akm_7 = {
+	.desc = {
+		.modes = &lg_acx467akm_7_mode,
+		.num_modes = 1,
+		.bpc = 8,
+		.size = {
+			.width = 62,
+			.height = 110,
+		},
+	},
+	.flags = 0,
+	.format = MIPI_DSI_FMT_RGB888,
+	.lanes = 4,
+};
+
 static const struct of_device_id dsi_of_match[] = {
 	{
 		.compatible = "auo,b080uan01",
@@ -3042,6 +3070,9 @@ static const struct of_device_id dsi_of_match[] = {
 		.compatible = "panasonic,vvx10f004b00",
 		.data = &panasonic_vvx10f004b00
 	}, {
+		.compatible = "lg,acx467akm-7",
+		.data = &lg_acx467akm_7
+	}, {
 		/* sentinel */
 	}
 };
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
new file mode 100644
index 000000000000..7f5e572daa2d
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config DRM_PANFROST
+	tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)"
+	depends on DRM
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on MMU
+	select DRM_SCHED
+	select IOMMU_SUPPORT
+	select IOMMU_IO_PGTABLE_LPAE
+	select DRM_GEM_SHMEM_HELPER
+	help
+	  DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and
+	  Bifrost (G3x, G5x, G7x) GPUs.
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
new file mode 100644
index 000000000000..6de72d13c58f
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+panfrost-y := \
+	panfrost_drv.o \
+	panfrost_device.o \
+	panfrost_devfreq.o \
+	panfrost_gem.o \
+	panfrost_gpu.o \
+	panfrost_job.o \
+	panfrost_mmu.o
+
+obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO
new file mode 100644
index 000000000000..c2e44add37d8
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/TODO
@@ -0,0 +1,27 @@
+- Thermal support.
+
+- Bifrost support:
+  - DT bindings (Neil, WIP)
+  - MMU page table format and address space setup
+  - Bifrost specific feature and issue handling
+  - Coherent DMA support
+
+- Support for 2MB pages. The io-pgtable code already supports this. Finishing
+  support involves either copying or adapting the iommu API to handle passing
+  aligned addresses and sizes to the io-pgtable code.
+
+- Per FD address space support. The h/w supports multiple addresses spaces.
+  The hard part is handling when more address spaces are needed than what
+  the h/w provides.
+
+- Support pinning pages on demand (GPU page faults).
+
+- Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu)
+
+- Support for madvise and a shrinker.
+
+- Compute job support. So called 'compute only' jobs need to be plumbed up to
+  userspace.
+
+- Performance counter support. (Boris)
+
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
new file mode 100644
index 000000000000..a8121ae67ee3
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Collabora ltd. */
+#include <linux/devfreq.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include "panfrost_device.h"
+#include "panfrost_features.h"
+#include "panfrost_issues.h"
+#include "panfrost_gpu.h"
+#include "panfrost_regs.h"
+
+static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, int slot);
+
+static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
+				   u32 flags)
+{
+	struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev));
+	struct dev_pm_opp *opp;
+	unsigned long old_clk_rate = pfdev->devfreq.cur_freq;
+	unsigned long target_volt, target_rate;
+	int err;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	target_rate = dev_pm_opp_get_freq(opp);
+	target_volt = dev_pm_opp_get_voltage(opp);
+	dev_pm_opp_put(opp);
+
+	if (old_clk_rate == target_rate)
+		return 0;
+
+	/*
+	 * If frequency scaling from low to high, adjust voltage first.
+	 * If frequency scaling from high to low, adjust frequency first.
+	 */
+	if (old_clk_rate < target_rate) {
+		err = regulator_set_voltage(pfdev->regulator, target_volt,
+					    target_volt);
+		if (err) {
+			dev_err(dev, "Cannot set voltage %lu uV\n",
+				target_volt);
+			return err;
+		}
+	}
+
+	err = clk_set_rate(pfdev->clock, target_rate);
+	if (err) {
+		dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate,
+			err);
+		regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt,
+				      pfdev->devfreq.cur_volt);
+		return err;
+	}
+
+	if (old_clk_rate > target_rate) {
+		err = regulator_set_voltage(pfdev->regulator, target_volt,
+					    target_volt);
+		if (err)
+			dev_err(dev, "Cannot set voltage %lu uV\n", target_volt);
+	}
+
+	pfdev->devfreq.cur_freq = target_rate;
+	pfdev->devfreq.cur_volt = target_volt;
+
+	return 0;
+}
+
+static void panfrost_devfreq_reset(struct panfrost_device *pfdev)
+{
+	ktime_t now = ktime_get();
+	int i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		pfdev->devfreq.slot[i].busy_time = 0;
+		pfdev->devfreq.slot[i].idle_time = 0;
+		pfdev->devfreq.slot[i].time_last_update = now;
+	}
+}
+
+static int panfrost_devfreq_get_dev_status(struct device *dev,
+					   struct devfreq_dev_status *status)
+{
+	struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev));
+	int i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		panfrost_devfreq_update_utilization(pfdev, i);
+	}
+
+	status->current_frequency = clk_get_rate(pfdev->clock);
+	status->total_time = ktime_to_ns(ktime_add(pfdev->devfreq.slot[0].busy_time,
+						   pfdev->devfreq.slot[0].idle_time));
+
+	status->busy_time = 0;
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		status->busy_time += ktime_to_ns(pfdev->devfreq.slot[i].busy_time);
+	}
+
+	/* We're scheduling only to one core atm, so don't divide for now */
+	/* status->busy_time /= NUM_JOB_SLOTS; */
+
+	panfrost_devfreq_reset(pfdev);
+
+	dev_dbg(pfdev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n", status->busy_time,
+		status->total_time,
+		status->busy_time / (status->total_time / 100),
+		status->current_frequency / 1000 / 1000);
+
+	return 0;
+}
+
+static int panfrost_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev));
+
+	*freq = pfdev->devfreq.cur_freq;
+
+	return 0;
+}
+
+static struct devfreq_dev_profile panfrost_devfreq_profile = {
+	.polling_ms = 50, /* ~3 frames */
+	.target = panfrost_devfreq_target,
+	.get_dev_status = panfrost_devfreq_get_dev_status,
+	.get_cur_freq = panfrost_devfreq_get_cur_freq,
+};
+
+int panfrost_devfreq_init(struct panfrost_device *pfdev)
+{
+	int ret;
+	struct dev_pm_opp *opp;
+
+	if (!pfdev->regulator)
+		return 0;
+
+	ret = dev_pm_opp_of_add_table(&pfdev->pdev->dev);
+	if (ret == -ENODEV) /* Optional, continue without devfreq */
+		return 0;
+
+	panfrost_devfreq_reset(pfdev);
+
+	pfdev->devfreq.cur_freq = clk_get_rate(pfdev->clock);
+
+	opp = devfreq_recommended_opp(&pfdev->pdev->dev, &pfdev->devfreq.cur_freq, 0);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	panfrost_devfreq_profile.initial_freq = pfdev->devfreq.cur_freq;
+	dev_pm_opp_put(opp);
+
+	pfdev->devfreq.devfreq = devm_devfreq_add_device(&pfdev->pdev->dev,
+			&panfrost_devfreq_profile, "simple_ondemand", NULL);
+	if (IS_ERR(pfdev->devfreq.devfreq)) {
+		DRM_DEV_ERROR(&pfdev->pdev->dev, "Couldn't initialize GPU devfreq\n");
+		ret = PTR_ERR(pfdev->devfreq.devfreq);
+		pfdev->devfreq.devfreq = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+void panfrost_devfreq_resume(struct panfrost_device *pfdev)
+{
+	int i;
+
+	if (!pfdev->devfreq.devfreq)
+		return;
+
+	panfrost_devfreq_reset(pfdev);
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		pfdev->devfreq.slot[i].busy = false;
+
+	devfreq_resume_device(pfdev->devfreq.devfreq);
+}
+
+void panfrost_devfreq_suspend(struct panfrost_device *pfdev)
+{
+	if (!pfdev->devfreq.devfreq)
+		return;
+
+	devfreq_suspend_device(pfdev->devfreq.devfreq);
+}
+
+static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, int slot)
+{
+	struct panfrost_devfreq_slot *devfreq_slot = &pfdev->devfreq.slot[slot];
+	ktime_t now;
+	ktime_t last;
+
+	if (!pfdev->devfreq.devfreq)
+		return;
+
+	now = ktime_get();
+	last = pfdev->devfreq.slot[slot].time_last_update;
+
+	/* If we last recorded a transition to busy, we have been idle since */
+	if (devfreq_slot->busy)
+		pfdev->devfreq.slot[slot].busy_time += ktime_sub(now, last);
+	else
+		pfdev->devfreq.slot[slot].idle_time += ktime_sub(now, last);
+
+	pfdev->devfreq.slot[slot].time_last_update = now;
+}
+
+/* The job scheduler is expected to call this at every transition busy <-> idle */
+void panfrost_devfreq_record_transition(struct panfrost_device *pfdev, int slot)
+{
+	struct panfrost_devfreq_slot *devfreq_slot = &pfdev->devfreq.slot[slot];
+
+	panfrost_devfreq_update_utilization(pfdev, slot);
+	devfreq_slot->busy = !devfreq_slot->busy;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
new file mode 100644
index 000000000000..eb999531ed90
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANFROST_DEVFREQ_H__
+#define __PANFROST_DEVFREQ_H__
+
+int panfrost_devfreq_init(struct panfrost_device *pfdev);
+
+void panfrost_devfreq_resume(struct panfrost_device *pfdev);
+void panfrost_devfreq_suspend(struct panfrost_device *pfdev);
+
+void panfrost_devfreq_record_transition(struct panfrost_device *pfdev, int slot);
+
+#endif /* __PANFROST_DEVFREQ_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
new file mode 100644
index 000000000000..91e8fb0f2b25
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#include <linux/clk.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+#include "panfrost_features.h"
+#include "panfrost_gpu.h"
+#include "panfrost_job.h"
+#include "panfrost_mmu.h"
+
+static int panfrost_reset_init(struct panfrost_device *pfdev)
+{
+	int err;
+
+	pfdev->rstc = devm_reset_control_array_get(pfdev->dev, false, true);
+	if (IS_ERR(pfdev->rstc)) {
+		dev_err(pfdev->dev, "get reset failed %ld\n", PTR_ERR(pfdev->rstc));
+		return PTR_ERR(pfdev->rstc);
+	}
+
+	err = reset_control_deassert(pfdev->rstc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void panfrost_reset_fini(struct panfrost_device *pfdev)
+{
+	reset_control_assert(pfdev->rstc);
+}
+
+static int panfrost_clk_init(struct panfrost_device *pfdev)
+{
+	int err;
+	unsigned long rate;
+
+	pfdev->clock = devm_clk_get(pfdev->dev, NULL);
+	if (IS_ERR(pfdev->clock)) {
+		dev_err(pfdev->dev, "get clock failed %ld\n", PTR_ERR(pfdev->clock));
+		return PTR_ERR(pfdev->clock);
+	}
+
+	rate = clk_get_rate(pfdev->clock);
+	dev_info(pfdev->dev, "clock rate = %lu\n", rate);
+
+	err = clk_prepare_enable(pfdev->clock);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void panfrost_clk_fini(struct panfrost_device *pfdev)
+{
+	clk_disable_unprepare(pfdev->clock);
+}
+
+static int panfrost_regulator_init(struct panfrost_device *pfdev)
+{
+	int ret;
+
+	pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali");
+	if (IS_ERR(pfdev->regulator)) {
+		ret = PTR_ERR(pfdev->regulator);
+		pfdev->regulator = NULL;
+		if (ret == -ENODEV)
+			return 0;
+		dev_err(pfdev->dev, "failed to get regulator: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_enable(pfdev->regulator);
+	if (ret < 0) {
+		dev_err(pfdev->dev, "failed to enable regulator: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void panfrost_regulator_fini(struct panfrost_device *pfdev)
+{
+	if (pfdev->regulator)
+		regulator_disable(pfdev->regulator);
+}
+
+int panfrost_device_init(struct panfrost_device *pfdev)
+{
+	int err;
+	struct resource *res;
+
+	mutex_init(&pfdev->sched_lock);
+	INIT_LIST_HEAD(&pfdev->scheduled_jobs);
+
+	spin_lock_init(&pfdev->hwaccess_lock);
+
+	err = panfrost_clk_init(pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "clk init failed %d\n", err);
+		return err;
+	}
+
+	err = panfrost_regulator_init(pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "regulator init failed %d\n", err);
+		goto err_out0;
+	}
+
+	err = panfrost_reset_init(pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "reset init failed %d\n", err);
+		goto err_out1;
+	}
+
+	res = platform_get_resource(pfdev->pdev, IORESOURCE_MEM, 0);
+	pfdev->iomem = devm_ioremap_resource(pfdev->dev, res);
+	if (IS_ERR(pfdev->iomem)) {
+		dev_err(pfdev->dev, "failed to ioremap iomem\n");
+		err = PTR_ERR(pfdev->iomem);
+		goto err_out2;
+	}
+
+	err = panfrost_gpu_init(pfdev);
+	if (err)
+		goto err_out2;
+
+	err = panfrost_mmu_init(pfdev);
+	if (err)
+		goto err_out3;
+
+	err = panfrost_job_init(pfdev);
+	if (err)
+		goto err_out4;
+
+	/* runtime PM will wake us up later */
+	panfrost_gpu_power_off(pfdev);
+
+	pm_runtime_set_active(pfdev->dev);
+	pm_runtime_get_sync(pfdev->dev);
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+
+	return 0;
+err_out4:
+	panfrost_mmu_fini(pfdev);
+err_out3:
+	panfrost_gpu_fini(pfdev);
+err_out2:
+	panfrost_reset_fini(pfdev);
+err_out1:
+	panfrost_regulator_fini(pfdev);
+err_out0:
+	panfrost_clk_fini(pfdev);
+	return err;
+}
+
+void panfrost_device_fini(struct panfrost_device *pfdev)
+{
+	panfrost_regulator_fini(pfdev);
+	panfrost_clk_fini(pfdev);
+}
+
+const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code)
+{
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00: return "NOT_STARTED/IDLE/OK";
+	case 0x01: return "DONE";
+	case 0x02: return "INTERRUPTED";
+	case 0x03: return "STOPPED";
+	case 0x04: return "TERMINATED";
+	case 0x08: return "ACTIVE";
+		/* Job exceptions */
+	case 0x40: return "JOB_CONFIG_FAULT";
+	case 0x41: return "JOB_POWER_FAULT";
+	case 0x42: return "JOB_READ_FAULT";
+	case 0x43: return "JOB_WRITE_FAULT";
+	case 0x44: return "JOB_AFFINITY_FAULT";
+	case 0x48: return "JOB_BUS_FAULT";
+	case 0x50: return "INSTR_INVALID_PC";
+	case 0x51: return "INSTR_INVALID_ENC";
+	case 0x52: return "INSTR_TYPE_MISMATCH";
+	case 0x53: return "INSTR_OPERAND_FAULT";
+	case 0x54: return "INSTR_TLS_FAULT";
+	case 0x55: return "INSTR_BARRIER_FAULT";
+	case 0x56: return "INSTR_ALIGN_FAULT";
+	case 0x58: return "DATA_INVALID_FAULT";
+	case 0x59: return "TILE_RANGE_FAULT";
+	case 0x5A: return "ADDR_RANGE_FAULT";
+	case 0x60: return "OUT_OF_MEMORY";
+		/* GPU exceptions */
+	case 0x80: return "DELAYED_BUS_FAULT";
+	case 0x88: return "SHAREABILITY_FAULT";
+		/* MMU exceptions */
+	case 0xC1: return "TRANSLATION_FAULT_LEVEL1";
+	case 0xC2: return "TRANSLATION_FAULT_LEVEL2";
+	case 0xC3: return "TRANSLATION_FAULT_LEVEL3";
+	case 0xC4: return "TRANSLATION_FAULT_LEVEL4";
+	case 0xC8: return "PERMISSION_FAULT";
+	case 0xC9 ... 0xCF: return "PERMISSION_FAULT";
+	case 0xD1: return "TRANSTAB_BUS_FAULT_LEVEL1";
+	case 0xD2: return "TRANSTAB_BUS_FAULT_LEVEL2";
+	case 0xD3: return "TRANSTAB_BUS_FAULT_LEVEL3";
+	case 0xD4: return "TRANSTAB_BUS_FAULT_LEVEL4";
+	case 0xD8: return "ACCESS_FLAG";
+	case 0xD9 ... 0xDF: return "ACCESS_FLAG";
+	case 0xE0 ... 0xE7: return "ADDRESS_SIZE_FAULT";
+	case 0xE8 ... 0xEF: return "MEMORY_ATTRIBUTES_FAULT";
+	}
+
+	return "UNKNOWN";
+}
+
+#ifdef CONFIG_PM
+int panfrost_device_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct panfrost_device *pfdev = platform_get_drvdata(pdev);
+
+	panfrost_gpu_soft_reset(pfdev);
+
+	/* TODO: Re-enable all other address spaces */
+	panfrost_gpu_power_on(pfdev);
+	panfrost_mmu_enable(pfdev, 0);
+	panfrost_job_enable_interrupts(pfdev);
+	panfrost_devfreq_resume(pfdev);
+
+	return 0;
+}
+
+int panfrost_device_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct panfrost_device *pfdev = platform_get_drvdata(pdev);
+
+	if (!panfrost_job_is_idle(pfdev))
+		return -EBUSY;
+
+	panfrost_devfreq_suspend(pfdev);
+	panfrost_gpu_power_off(pfdev);
+
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
new file mode 100644
index 000000000000..1ba48d105763
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#ifndef __PANFROST_DEVICE_H__
+#define __PANFROST_DEVICE_H__
+
+#include <linux/spinlock.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mm.h>
+#include <drm/gpu_scheduler.h>
+
+struct panfrost_device;
+struct panfrost_mmu;
+struct panfrost_job_slot;
+struct panfrost_job;
+
+#define NUM_JOB_SLOTS 3
+
+struct panfrost_features {
+	u16 id;
+	u16 revision;
+
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+	u32 as_present;
+	u32 js_present;
+
+	u32 l2_features;
+	u32 core_features;
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 thread_features;
+	u32 max_threads;
+	u32 thread_max_workgroup_sz;
+	u32 thread_max_barrier_sz;
+	u32 coherency_features;
+	u32 texture_features[4];
+	u32 js_features[16];
+
+	u32 nr_core_groups;
+
+	unsigned long hw_features[64 / BITS_PER_LONG];
+	unsigned long hw_issues[64 / BITS_PER_LONG];
+};
+
+struct panfrost_devfreq_slot {
+	ktime_t busy_time;
+	ktime_t idle_time;
+	ktime_t time_last_update;
+	bool busy;
+};
+
+struct panfrost_device {
+	struct device *dev;
+	struct drm_device *ddev;
+	struct platform_device *pdev;
+
+	spinlock_t hwaccess_lock;
+
+	struct drm_mm mm;
+	spinlock_t mm_lock;
+
+	void __iomem *iomem;
+	struct clk *clock;
+	struct regulator *regulator;
+	struct reset_control *rstc;
+
+	struct panfrost_features features;
+
+	struct panfrost_mmu *mmu;
+	struct panfrost_job_slot *js;
+
+	struct panfrost_job *jobs[NUM_JOB_SLOTS];
+	struct list_head scheduled_jobs;
+
+	struct mutex sched_lock;
+
+	struct {
+		struct devfreq *devfreq;
+		struct thermal_cooling_device *cooling;
+		unsigned long cur_freq;
+		unsigned long cur_volt;
+		struct panfrost_devfreq_slot slot[NUM_JOB_SLOTS];
+	} devfreq;
+};
+
+struct panfrost_file_priv {
+	struct panfrost_device *pfdev;
+
+	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
+};
+
+static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
+{
+	return ddev->dev_private;
+}
+
+static inline int panfrost_model_cmp(struct panfrost_device *pfdev, s32 id)
+{
+	s32 match_id = pfdev->features.id;
+
+	if (match_id & 0xf000)
+		match_id &= 0xf00f;
+	return match_id - id;
+}
+
+static inline bool panfrost_model_eq(struct panfrost_device *pfdev, s32 id)
+{
+	return !panfrost_model_cmp(pfdev, id);
+}
+
+int panfrost_device_init(struct panfrost_device *pfdev);
+void panfrost_device_fini(struct panfrost_device *pfdev);
+
+int panfrost_device_resume(struct device *dev);
+int panfrost_device_suspend(struct device *dev);
+
+const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
new file mode 100644
index 000000000000..c06af78ab833
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/pagemap.h>
+#include <linux/pm_runtime.h>
+#include <drm/panfrost_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+#include "panfrost_gem.h"
+#include "panfrost_mmu.h"
+#include "panfrost_job.h"
+#include "panfrost_gpu.h"
+
+static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct drm_file *file)
+{
+	struct drm_panfrost_get_param *param = data;
+	struct panfrost_device *pfdev = ddev->dev_private;
+
+	if (param->pad != 0)
+		return -EINVAL;
+
+	switch (param->param) {
+	case DRM_PANFROST_PARAM_GPU_PROD_ID:
+		param->value = pfdev->features.id;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	int ret;
+	struct drm_gem_shmem_object *shmem;
+	struct drm_panfrost_create_bo *args = data;
+
+	if (!args->size || args->flags || args->pad)
+		return -EINVAL;
+
+	shmem = drm_gem_shmem_create_with_handle(file, dev, args->size,
+						 &args->handle);
+	if (IS_ERR(shmem))
+		return PTR_ERR(shmem);
+
+	ret = panfrost_mmu_map(to_panfrost_bo(&shmem->base));
+	if (ret)
+		goto err_free;
+
+	args->offset = to_panfrost_bo(&shmem->base)->node.start << PAGE_SHIFT;
+
+	return 0;
+
+err_free:
+	drm_gem_object_put_unlocked(&shmem->base);
+	return ret;
+}
+
+/**
+ * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @args: IOCTL args
+ * @job: job being set up
+ *
+ * Resolve handles from userspace to BOs and attach them to job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at panfrost_job_cleanup() time.
+ */
+static int
+panfrost_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct drm_panfrost_submit *args,
+		  struct panfrost_job *job)
+{
+	job->bo_count = args->bo_handle_count;
+
+	if (!job->bo_count)
+		return 0;
+
+	job->implicit_fences = kvmalloc_array(job->bo_count,
+				  sizeof(struct dma_fence *),
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!job->implicit_fences)
+		return -ENOMEM;
+
+	return drm_gem_objects_lookup(file_priv,
+				      (void __user *)(uintptr_t)args->bo_handles,
+				      job->bo_count, &job->bos);
+}
+
+/**
+ * panfrost_copy_in_sync() - Sets up job->in_fences[] with the sync objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @args: IOCTL args
+ * @job: job being set up
+ *
+ * Resolve syncobjs from userspace to fences and attach them to job.
+ *
+ * Note that this function doesn't need to unreference the fences on
+ * failure, because that will happen at panfrost_job_cleanup() time.
+ */
+static int
+panfrost_copy_in_sync(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct drm_panfrost_submit *args,
+		  struct panfrost_job *job)
+{
+	u32 *handles;
+	int ret = 0;
+	int i;
+
+	job->in_fence_count = args->in_sync_count;
+
+	if (!job->in_fence_count)
+		return 0;
+
+	job->in_fences = kvmalloc_array(job->in_fence_count,
+					sizeof(struct dma_fence *),
+					GFP_KERNEL | __GFP_ZERO);
+	if (!job->in_fences) {
+		DRM_DEBUG("Failed to allocate job in fences\n");
+		return -ENOMEM;
+	}
+
+	handles = kvmalloc_array(job->in_fence_count, sizeof(u32), GFP_KERNEL);
+	if (!handles) {
+		ret = -ENOMEM;
+		DRM_DEBUG("Failed to allocate incoming syncobj handles\n");
+		goto fail;
+	}
+
+	if (copy_from_user(handles,
+			   (void __user *)(uintptr_t)args->in_syncs,
+			   job->in_fence_count * sizeof(u32))) {
+		ret = -EFAULT;
+		DRM_DEBUG("Failed to copy in syncobj handles\n");
+		goto fail;
+	}
+
+	for (i = 0; i < job->in_fence_count; i++) {
+		ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0,
+					     &job->in_fences[i]);
+		if (ret == -EINVAL)
+			goto fail;
+	}
+
+fail:
+	kvfree(handles);
+	return ret;
+}
+
+static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct drm_panfrost_submit *args = data;
+	struct drm_syncobj *sync_out;
+	struct panfrost_job *job;
+	int ret = 0;
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	kref_init(&job->refcount);
+
+	job->pfdev = pfdev;
+	job->jc = args->jc;
+	job->requirements = args->requirements;
+	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
+	job->file_priv = file->driver_priv;
+
+	ret = panfrost_copy_in_sync(dev, file, args, job);
+	if (ret)
+		goto fail;
+
+	ret = panfrost_lookup_bos(dev, file, args, job);
+	if (ret)
+		goto fail;
+
+	ret = panfrost_job_push(job);
+	if (ret)
+		goto fail;
+
+	/* Update the return sync object for the job */
+	sync_out = drm_syncobj_find(file, args->out_sync);
+	if (sync_out) {
+		drm_syncobj_replace_fence(sync_out, job->render_done_fence);
+		drm_syncobj_put(sync_out);
+	}
+
+fail:
+	panfrost_job_put(job);
+
+	return ret;
+}
+
+static int
+panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	long ret;
+	struct drm_panfrost_wait_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	if (args->pad)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj)
+		return -ENOENT;
+
+	ret = reservation_object_wait_timeout_rcu(gem_obj->resv, true,
+						  true, timeout);
+	if (!ret)
+		ret = timeout ? -ETIMEDOUT : -EBUSY;
+
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
+}
+
+static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct drm_panfrost_mmap_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	int ret;
+
+	if (args->flags != 0) {
+		DRM_INFO("unknown mmap_bo flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	ret = drm_gem_create_mmap_offset(gem_obj);
+	if (ret == 0)
+		args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
+}
+
+static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv)
+{
+	struct drm_panfrost_get_bo_offset *args = data;
+	struct drm_gem_object *gem_obj;
+	struct panfrost_gem_object *bo;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+	bo = to_panfrost_bo(gem_obj);
+
+	args->offset = bo->node.start << PAGE_SHIFT;
+
+	drm_gem_object_put_unlocked(gem_obj);
+	return 0;
+}
+
+static int
+panfrost_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_file_priv *panfrost_priv;
+
+	panfrost_priv = kzalloc(sizeof(*panfrost_priv), GFP_KERNEL);
+	if (!panfrost_priv)
+		return -ENOMEM;
+
+	panfrost_priv->pfdev = pfdev;
+	file->driver_priv = panfrost_priv;
+
+	return panfrost_job_open(panfrost_priv);
+}
+
+static void
+panfrost_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
+
+	panfrost_job_close(panfrost_priv);
+
+	kfree(panfrost_priv);
+}
+
+/* DRM_AUTH is required on SUBMIT for now, while all clients share a single
+ * address space.  Note that render nodes would be able to submit jobs that
+ * could access BOs from clients authenticated with the master node.
+ */
+static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
+#define PANFROST_IOCTL(n, func, flags) \
+	DRM_IOCTL_DEF_DRV(PANFROST_##n, panfrost_ioctl_##func, flags)
+
+	PANFROST_IOCTL(SUBMIT,		submit,		DRM_RENDER_ALLOW | DRM_AUTH),
+	PANFROST_IOCTL(WAIT_BO,		wait_bo,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(CREATE_BO,	create_bo,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(MMAP_BO,		mmap_bo,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(GET_PARAM,	get_param,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(GET_BO_OFFSET,	get_bo_offset,	DRM_RENDER_ALLOW),
+};
+
+DEFINE_DRM_GEM_SHMEM_FOPS(panfrost_drm_driver_fops);
+
+static struct drm_driver panfrost_drm_driver = {
+	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME |
+				  DRIVER_SYNCOBJ,
+	.open			= panfrost_open,
+	.postclose		= panfrost_postclose,
+	.ioctls			= panfrost_drm_driver_ioctls,
+	.num_ioctls		= ARRAY_SIZE(panfrost_drm_driver_ioctls),
+	.fops			= &panfrost_drm_driver_fops,
+	.name			= "panfrost",
+	.desc			= "panfrost DRM",
+	.date			= "20180908",
+	.major			= 1,
+	.minor			= 0,
+
+	.gem_create_object	= panfrost_gem_create_object,
+	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
+	.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
+	.gem_prime_mmap		= drm_gem_prime_mmap,
+};
+
+static int panfrost_probe(struct platform_device *pdev)
+{
+	struct panfrost_device *pfdev;
+	struct drm_device *ddev;
+	int err;
+
+	pfdev = devm_kzalloc(&pdev->dev, sizeof(*pfdev), GFP_KERNEL);
+	if (!pfdev)
+		return -ENOMEM;
+
+	pfdev->pdev = pdev;
+	pfdev->dev = &pdev->dev;
+
+	platform_set_drvdata(pdev, pfdev);
+
+	/* Allocate and initialze the DRM device. */
+	ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev);
+	if (IS_ERR(ddev))
+		return PTR_ERR(ddev);
+
+	ddev->dev_private = pfdev;
+	pfdev->ddev = ddev;
+
+	spin_lock_init(&pfdev->mm_lock);
+
+	/* 4G enough for now. can be 48-bit */
+	drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
+
+	pm_runtime_use_autosuspend(pfdev->dev);
+	pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
+	pm_runtime_enable(pfdev->dev);
+
+	err = panfrost_device_init(pfdev);
+	if (err) {
+		dev_err(&pdev->dev, "Fatal error during GPU init\n");
+		goto err_out0;
+	}
+
+	dma_set_mask_and_coherent(pfdev->dev,
+		DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features)));
+
+	err = panfrost_devfreq_init(pfdev);
+	if (err) {
+		dev_err(&pdev->dev, "Fatal error during devfreq init\n");
+		goto err_out1;
+	}
+
+	/*
+	 * Register the DRM device with the core and the connectors with
+	 * sysfs
+	 */
+	err = drm_dev_register(ddev, 0);
+	if (err < 0)
+		goto err_out1;
+
+	return 0;
+
+err_out1:
+	panfrost_device_fini(pfdev);
+err_out0:
+	drm_dev_put(ddev);
+	return err;
+}
+
+static int panfrost_remove(struct platform_device *pdev)
+{
+	struct panfrost_device *pfdev = platform_get_drvdata(pdev);
+	struct drm_device *ddev = pfdev->ddev;
+
+	drm_dev_unregister(ddev);
+	pm_runtime_get_sync(pfdev->dev);
+	pm_runtime_put_sync_autosuspend(pfdev->dev);
+	pm_runtime_disable(pfdev->dev);
+	panfrost_device_fini(pfdev);
+	drm_dev_put(ddev);
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "arm,mali-t604" },
+	{ .compatible = "arm,mali-t624" },
+	{ .compatible = "arm,mali-t628" },
+	{ .compatible = "arm,mali-t720" },
+	{ .compatible = "arm,mali-t760" },
+	{ .compatible = "arm,mali-t820" },
+	{ .compatible = "arm,mali-t830" },
+	{ .compatible = "arm,mali-t860" },
+	{ .compatible = "arm,mali-t880" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static const struct dev_pm_ops panfrost_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(panfrost_device_suspend, panfrost_device_resume, NULL)
+};
+
+static struct platform_driver panfrost_driver = {
+	.probe		= panfrost_probe,
+	.remove		= panfrost_remove,
+	.driver		= {
+		.name	= "panfrost",
+		.pm	= &panfrost_pm_ops,
+		.of_match_table = dt_match,
+	},
+};
+module_platform_driver(panfrost_driver);
+
+MODULE_AUTHOR("Panfrost Project Developers");
+MODULE_DESCRIPTION("Panfrost DRM Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h
new file mode 100644
index 000000000000..5056777c7744
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_features.h
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+#ifndef __PANFROST_FEATURES_H__
+#define __PANFROST_FEATURES_H__
+
+#include <linux/bitops.h>
+
+#include "panfrost_device.h"
+
+enum panfrost_hw_feature {
+	HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	HW_FEATURE_XAFFINITY,
+	HW_FEATURE_OUT_OF_ORDER_EXEC,
+	HW_FEATURE_MRT,
+	HW_FEATURE_BRNDOUT_CC,
+	HW_FEATURE_INTERPIPE_REG_ALIASING,
+	HW_FEATURE_LD_ST_TILEBUFFER,
+	HW_FEATURE_MSAA_16X,
+	HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	HW_FEATURE_T7XX_PAIRING_RULES,
+	HW_FEATURE_LD_ST_LEA_TEX,
+	HW_FEATURE_LINEAR_FILTER_FLOAT,
+	HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	HW_FEATURE_TEST4_DATUM_MODE,
+	HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	HW_FEATURE_BRNDOUT_KILL,
+	HW_FEATURE_WARPING,
+	HW_FEATURE_V4,
+	HW_FEATURE_FLUSH_REDUCTION,
+	HW_FEATURE_PROTECTED_MODE,
+	HW_FEATURE_COHERENCY_REG,
+	HW_FEATURE_PROTECTED_DEBUG_MODE,
+	HW_FEATURE_AARCH64_MMU,
+	HW_FEATURE_TLS_HASHING,
+	HW_FEATURE_THREAD_GROUP_SPLIT,
+	HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+};
+
+#define hw_features_t600 (\
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_V4))
+
+#define hw_features_t620 (\
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_V4))
+
+#define hw_features_t720 (\
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_OPTIMIZED_COVERAGE_MASK) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_V4))
+
+
+#define hw_features_t760 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT))
+
+// T860
+#define hw_features_t860 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT))
+
+#define hw_features_t880 hw_features_t860
+
+#define hw_features_t830 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT))
+
+#define hw_features_t820 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT))
+
+#define hw_features_g71 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g72 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g51 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g52 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g76 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
+	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
+	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
+	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
+
+#define hw_features_g31 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_WARPING) | \
+	BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \
+	BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \
+	BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \
+	BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \
+	BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \
+	BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \
+	BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \
+	BIT_ULL(HW_FEATURE_MRT) | \
+	BIT_ULL(HW_FEATURE_MSAA_16X) | \
+	BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \
+	BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \
+	BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \
+	BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
+	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
+	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
+	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
+
+static inline bool panfrost_has_hw_feature(struct panfrost_device *pfdev,
+					   enum panfrost_hw_feature feat)
+{
+	return test_bit(feat, pfdev->features.hw_features);
+}
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
new file mode 100644
index 000000000000..8a0376283a21
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+
+#include <drm/panfrost_drm.h>
+#include "panfrost_device.h"
+#include "panfrost_gem.h"
+#include "panfrost_mmu.h"
+
+/* Called DRM core on the last userspace/kernel unreference of the
+ * BO.
+ */
+void panfrost_gem_free_object(struct drm_gem_object *obj)
+{
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+	struct panfrost_device *pfdev = obj->dev->dev_private;
+
+	panfrost_mmu_unmap(bo);
+
+	spin_lock(&pfdev->mm_lock);
+	drm_mm_remove_node(&bo->node);
+	spin_unlock(&pfdev->mm_lock);
+
+	drm_gem_shmem_free_object(obj);
+}
+
+static const struct drm_gem_object_funcs panfrost_gem_funcs = {
+	.free = panfrost_gem_free_object,
+	.print_info = drm_gem_shmem_print_info,
+	.pin = drm_gem_shmem_pin,
+	.unpin = drm_gem_shmem_unpin,
+	.get_sg_table = drm_gem_shmem_get_sg_table,
+	.vmap = drm_gem_shmem_vmap,
+	.vunmap = drm_gem_shmem_vunmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * panfrost_gem_create_object - Implementation of driver->gem_create_object.
+ * @dev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size)
+{
+	int ret;
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return NULL;
+
+	obj->base.base.funcs = &panfrost_gem_funcs;
+
+	spin_lock(&pfdev->mm_lock);
+	ret = drm_mm_insert_node(&pfdev->mm, &obj->node,
+				 roundup(size, PAGE_SIZE) >> PAGE_SHIFT);
+	spin_unlock(&pfdev->mm_lock);
+	if (ret)
+		goto free_obj;
+
+	return &obj->base.base;
+
+free_obj:
+	kfree(obj);
+	return ERR_PTR(ret);
+}
+
+struct drm_gem_object *
+panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+				   struct dma_buf_attachment *attach,
+				   struct sg_table *sgt)
+{
+	struct drm_gem_object *obj;
+	struct panfrost_gem_object *pobj;
+
+	obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	pobj = to_panfrost_bo(obj);
+
+	obj->resv = attach->dmabuf->resv;
+
+	panfrost_mmu_map(pobj);
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
new file mode 100644
index 000000000000..045000eb5fcf
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#ifndef __PANFROST_GEM_H__
+#define __PANFROST_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+struct panfrost_gem_object {
+	struct drm_gem_shmem_object base;
+
+	struct drm_mm_node node;
+};
+
+static inline
+struct  panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj)
+{
+	return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base);
+}
+
+struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size);
+
+struct drm_gem_object *
+panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+				   struct dma_buf_attachment *attach,
+				   struct sg_table *sgt);
+
+#endif /* __PANFROST_GEM_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
new file mode 100644
index 000000000000..aceaf6e44a09
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+
+#include "panfrost_device.h"
+#include "panfrost_features.h"
+#include "panfrost_issues.h"
+#include "panfrost_gpu.h"
+#include "panfrost_regs.h"
+
+#define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
+#define gpu_read(dev, reg) readl(dev->iomem + reg)
+
+static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+	u32 state = gpu_read(pfdev, GPU_INT_STAT);
+	u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS);
+
+	if (!state)
+		return IRQ_NONE;
+
+	if (state & GPU_IRQ_MASK_ERROR) {
+		u64 address = (u64) gpu_read(pfdev, GPU_FAULT_ADDRESS_HI) << 32;
+		address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO);
+
+		dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
+			 fault_status & 0xFF, panfrost_exception_name(pfdev, fault_status),
+			 address);
+
+		if (state & GPU_IRQ_MULTIPLE_FAULT)
+			dev_warn(pfdev->dev, "There were multiple GPU faults - some have not been reported\n");
+
+		gpu_write(pfdev, GPU_INT_MASK, 0);
+	}
+
+	gpu_write(pfdev, GPU_INT_CLEAR, state);
+
+	return IRQ_HANDLED;
+}
+
+int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
+{
+	int ret;
+	u32 val;
+
+	gpu_write(pfdev, GPU_INT_MASK, 0);
+	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
+
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT,
+		val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000);
+
+	if (ret) {
+		dev_err(pfdev->dev, "gpu soft reset timed out\n");
+		return ret;
+	}
+
+	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
+	gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL);
+
+	return 0;
+}
+
+static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
+{
+	u32 quirks = 0;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8443) ||
+	    panfrost_has_hw_issue(pfdev, HW_ISSUE_11035))
+		quirks |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10327))
+		quirks |= SC_SDC_DISABLE_OQ_DISCARD;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10797))
+		quirks |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!panfrost_has_hw_issue(pfdev, GPUCORE_1619)) {
+		if (panfrost_model_cmp(pfdev, 0x750) < 0) /* T60x, T62x, T72x */
+			quirks |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (panfrost_model_cmp(pfdev, 0x880) <= 0) /* T76x, T8xx */
+			quirks |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_TLS_HASHING))
+		quirks |= SC_TLS_HASH_ENABLE;
+
+	if (quirks)
+		gpu_write(pfdev, GPU_SHADER_CONFIG, quirks);
+
+
+	quirks = gpu_read(pfdev, GPU_TILER_CONFIG);
+
+	/* Set tiler clock gate override if required */
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_T76X_3953))
+		quirks |= TC_CLOCK_GATE_OVERRIDE;
+
+	gpu_write(pfdev, GPU_TILER_CONFIG, quirks);
+
+
+	quirks = gpu_read(pfdev, GPU_L2_MMU_CONFIG);
+
+	/* Limit read & write ID width for AXI */
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
+		quirks &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS |
+			    L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
+	else
+		quirks &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS |
+			    L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+
+	gpu_write(pfdev, GPU_L2_MMU_CONFIG, quirks);
+
+	quirks = 0;
+	if ((panfrost_model_eq(pfdev, 0x860) || panfrost_model_eq(pfdev, 0x880)) &&
+	    pfdev->features.revision >= 0x2000)
+		quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT;
+	else if (panfrost_model_eq(pfdev, 0x6000) &&
+		 pfdev->features.coherency_features == COHERENCY_ACE)
+		quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+			   JM_FORCE_COHERENCY_FEATURES_SHIFT;
+
+	if (quirks)
+		gpu_write(pfdev, GPU_JM_CONFIG, quirks);
+}
+
+#define MAX_HW_REVS 6
+
+struct panfrost_model {
+	const char *name;
+	u32 id;
+	u32 id_mask;
+	u64 features;
+	u64 issues;
+	struct {
+		u32 revision;
+		u64 issues;
+	} revs[MAX_HW_REVS];
+};
+
+#define GPU_MODEL(_name, _id, ...) \
+{\
+	.name = __stringify(_name),				\
+	.id = _id,						\
+	.features = hw_features_##_name,			\
+	.issues = hw_issues_##_name,				\
+	.revs = { __VA_ARGS__ },				\
+}
+
+#define GPU_REV_EXT(name, _rev, _p, _s, stat) \
+{\
+	.revision = (_rev) << 12 | (_p) << 4 | (_s),		\
+	.issues = hw_issues_##name##_r##_rev##p##_p##stat,	\
+}
+#define GPU_REV(name, r, p) GPU_REV_EXT(name, r, p, 0, )
+
+static const struct panfrost_model gpu_models[] = {
+	/* T60x has an oddball version */
+	GPU_MODEL(t600, 0x600,
+		GPU_REV_EXT(t600, 0, 0, 1, _15dev0)),
+	GPU_MODEL(t620, 0x620,
+		GPU_REV(t620, 0, 1), GPU_REV(t620, 1, 0)),
+	GPU_MODEL(t720, 0x720),
+	GPU_MODEL(t760, 0x750,
+		GPU_REV(t760, 0, 0), GPU_REV(t760, 0, 1),
+		GPU_REV_EXT(t760, 0, 1, 0, _50rel0),
+		GPU_REV(t760, 0, 2), GPU_REV(t760, 0, 3)),
+	GPU_MODEL(t820, 0x820),
+	GPU_MODEL(t830, 0x830),
+	GPU_MODEL(t860, 0x860),
+	GPU_MODEL(t880, 0x880),
+
+	GPU_MODEL(g71, 0x6000,
+		GPU_REV_EXT(g71, 0, 0, 1, _05dev0)),
+	GPU_MODEL(g72, 0x6001),
+	GPU_MODEL(g51, 0x7000),
+	GPU_MODEL(g76, 0x7001),
+	GPU_MODEL(g52, 0x7002),
+	GPU_MODEL(g31, 0x7003,
+		GPU_REV(g31, 1, 0)),
+};
+
+static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
+{
+	u32 gpu_id, num_js, major, minor, status, rev;
+	const char *name = "unknown";
+	u64 hw_feat = 0;
+	u64 hw_issues = hw_issues_all;
+	const struct panfrost_model *model;
+	int i;
+
+	pfdev->features.l2_features = gpu_read(pfdev, GPU_L2_FEATURES);
+	pfdev->features.core_features = gpu_read(pfdev, GPU_CORE_FEATURES);
+	pfdev->features.tiler_features = gpu_read(pfdev, GPU_TILER_FEATURES);
+	pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES);
+	pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES);
+	pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES);
+	pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
+	for (i = 0; i < 4; i++)
+		pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
+
+	pfdev->features.as_present = gpu_read(pfdev, GPU_AS_PRESENT);
+
+	pfdev->features.js_present = gpu_read(pfdev, GPU_JS_PRESENT);
+	num_js = hweight32(pfdev->features.js_present);
+	for (i = 0; i < num_js; i++)
+		pfdev->features.js_features[i] = gpu_read(pfdev, GPU_JS_FEATURES(i));
+
+	pfdev->features.shader_present = gpu_read(pfdev, GPU_SHADER_PRESENT_LO);
+	pfdev->features.shader_present |= (u64)gpu_read(pfdev, GPU_SHADER_PRESENT_HI) << 32;
+
+	pfdev->features.tiler_present = gpu_read(pfdev, GPU_TILER_PRESENT_LO);
+	pfdev->features.tiler_present |= (u64)gpu_read(pfdev, GPU_TILER_PRESENT_HI) << 32;
+
+	pfdev->features.l2_present = gpu_read(pfdev, GPU_L2_PRESENT_LO);
+	pfdev->features.l2_present |= (u64)gpu_read(pfdev, GPU_L2_PRESENT_HI) << 32;
+	pfdev->features.nr_core_groups = hweight64(pfdev->features.l2_present);
+
+	pfdev->features.stack_present = gpu_read(pfdev, GPU_STACK_PRESENT_LO);
+	pfdev->features.stack_present |= (u64)gpu_read(pfdev, GPU_STACK_PRESENT_HI) << 32;
+
+	gpu_id = gpu_read(pfdev, GPU_ID);
+	pfdev->features.revision = gpu_id & 0xffff;
+	pfdev->features.id = gpu_id >> 16;
+
+	/* The T60x has an oddball ID value. Fix it up to the standard Midgard
+	 * format so we (and userspace) don't have to special case it.
+	 */
+	if (pfdev->features.id == 0x6956)
+		pfdev->features.id = 0x0600;
+
+	major = (pfdev->features.revision >> 12) & 0xf;
+	minor = (pfdev->features.revision >> 4) & 0xff;
+	status = pfdev->features.revision & 0xf;
+	rev = pfdev->features.revision;
+
+	gpu_id = pfdev->features.id;
+
+	for (model = gpu_models; model->name; model++) {
+		int best = -1;
+
+		if (!panfrost_model_eq(pfdev, model->id))
+			continue;
+
+		name = model->name;
+		hw_feat = model->features;
+		hw_issues |= model->issues;
+		for (i = 0; i < MAX_HW_REVS; i++) {
+			if (model->revs[i].revision == rev) {
+				best = i;
+				break;
+			} else if (model->revs[i].revision == (rev & ~0xf))
+				best = i;
+		}
+
+		if (best >= 0)
+			hw_issues |= model->revs[best].issues;
+
+		break;
+	}
+
+	bitmap_from_u64(pfdev->features.hw_features, hw_feat);
+	bitmap_from_u64(pfdev->features.hw_issues, hw_issues);
+
+	dev_info(pfdev->dev, "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x",
+		 name, gpu_id, major, minor, status);
+	dev_info(pfdev->dev, "features: %64pb, issues: %64pb",
+		 pfdev->features.hw_features,
+		 pfdev->features.hw_issues);
+
+	dev_info(pfdev->dev, "Features: L2:0x%08x Shader:0x%08x Tiler:0x%08x Mem:0x%0x MMU:0x%08x AS:0x%x JS:0x%x",
+		 gpu_read(pfdev, GPU_L2_FEATURES),
+		 gpu_read(pfdev, GPU_CORE_FEATURES),
+		 gpu_read(pfdev, GPU_TILER_FEATURES),
+		 gpu_read(pfdev, GPU_MEM_FEATURES),
+		 gpu_read(pfdev, GPU_MMU_FEATURES),
+		 gpu_read(pfdev, GPU_AS_PRESENT),
+		 gpu_read(pfdev, GPU_JS_PRESENT));
+
+	dev_info(pfdev->dev, "shader_present=0x%0llx l2_present=0x%0llx",
+		 pfdev->features.shader_present, pfdev->features.l2_present);
+}
+
+void panfrost_gpu_power_on(struct panfrost_device *pfdev)
+{
+	int ret;
+	u32 val;
+
+	/* Just turn on everything for now */
+	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
+		val, val == pfdev->features.l2_present, 100, 1000);
+
+	gpu_write(pfdev, STACK_PWRON_LO, pfdev->features.stack_present);
+	ret |= readl_relaxed_poll_timeout(pfdev->iomem + STACK_READY_LO,
+		val, val == pfdev->features.stack_present, 100, 1000);
+
+	gpu_write(pfdev, SHADER_PWRON_LO, pfdev->features.shader_present);
+	ret |= readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO,
+		val, val == pfdev->features.shader_present, 100, 1000);
+
+	gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present);
+	ret |= readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO,
+		val, val == pfdev->features.tiler_present, 100, 1000);
+
+	if (ret)
+		dev_err(pfdev->dev, "error powering up gpu");
+}
+
+void panfrost_gpu_power_off(struct panfrost_device *pfdev)
+{
+	gpu_write(pfdev, TILER_PWROFF_LO, 0);
+	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
+	gpu_write(pfdev, STACK_PWROFF_LO, 0);
+	gpu_write(pfdev, L2_PWROFF_LO, 0);
+}
+
+int panfrost_gpu_init(struct panfrost_device *pfdev)
+{
+	int err, irq;
+
+	err = panfrost_gpu_soft_reset(pfdev);
+	if (err)
+		return err;
+
+	panfrost_gpu_init_features(pfdev);
+
+	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
+	if (irq <= 0)
+		return -ENODEV;
+
+	err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler,
+			       IRQF_SHARED, "gpu", pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "failed to request gpu irq");
+		return err;
+	}
+
+	panfrost_gpu_init_quirks(pfdev);
+	panfrost_gpu_power_on(pfdev);
+
+	return 0;
+}
+
+void panfrost_gpu_fini(struct panfrost_device *pfdev)
+{
+	panfrost_gpu_power_off(pfdev);
+}
+
+u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev)
+{
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
+		return gpu_read(pfdev, GPU_LATEST_FLUSH_ID);
+	return 0;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
new file mode 100644
index 000000000000..4112412087b2
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANFROST_GPU_H__
+#define __PANFROST_GPU_H__
+
+struct panfrost_device;
+
+int panfrost_gpu_init(struct panfrost_device *pfdev);
+void panfrost_gpu_fini(struct panfrost_device *pfdev);
+
+u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev);
+
+int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
+void panfrost_gpu_power_on(struct panfrost_device *pfdev);
+void panfrost_gpu_power_off(struct panfrost_device *pfdev);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_issues.h b/drivers/gpu/drm/panfrost/panfrost_issues.h
new file mode 100644
index 000000000000..cec6dcdadb5c
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_issues.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+#ifndef __PANFROST_ISSUES_H__
+#define __PANFROST_ISSUES_H__
+
+#include <linux/bitops.h>
+
+#include "panfrost_device.h"
+
+/*
+ * This is not a complete list of issues, but only the ones the driver needs
+ * to care about.
+ */
+enum panfrost_hw_issue {
+	HW_ISSUE_6367,
+	HW_ISSUE_6787,
+	HW_ISSUE_8186,
+	HW_ISSUE_8245,
+	HW_ISSUE_8316,
+	HW_ISSUE_8394,
+	HW_ISSUE_8401,
+	HW_ISSUE_8408,
+	HW_ISSUE_8443,
+	HW_ISSUE_8987,
+	HW_ISSUE_9435,
+	HW_ISSUE_9510,
+	HW_ISSUE_9630,
+	HW_ISSUE_10327,
+	HW_ISSUE_10649,
+	HW_ISSUE_10676,
+	HW_ISSUE_10797,
+	HW_ISSUE_10817,
+	HW_ISSUE_10883,
+	HW_ISSUE_10959,
+	HW_ISSUE_10969,
+	HW_ISSUE_11020,
+	HW_ISSUE_11024,
+	HW_ISSUE_11035,
+	HW_ISSUE_11056,
+	HW_ISSUE_T76X_3542,
+	HW_ISSUE_T76X_3953,
+	HW_ISSUE_TMIX_8463,
+	GPUCORE_1619,
+	HW_ISSUE_TMIX_8438,
+	HW_ISSUE_TGOX_R1_1234,
+	HW_ISSUE_END
+};
+
+#define hw_issues_all (\
+	BIT_ULL(HW_ISSUE_9435))
+
+#define hw_issues_t600 (\
+	BIT_ULL(HW_ISSUE_6367) | \
+	BIT_ULL(HW_ISSUE_6787) | \
+	BIT_ULL(HW_ISSUE_8408) | \
+	BIT_ULL(HW_ISSUE_9510) | \
+	BIT_ULL(HW_ISSUE_10649) | \
+	BIT_ULL(HW_ISSUE_10676) | \
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11035) | \
+	BIT_ULL(HW_ISSUE_11056) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t600_r0p0_15dev0 (\
+	BIT_ULL(HW_ISSUE_8186) | \
+	BIT_ULL(HW_ISSUE_8245) | \
+	BIT_ULL(HW_ISSUE_8316) | \
+	BIT_ULL(HW_ISSUE_8394) | \
+	BIT_ULL(HW_ISSUE_8401) | \
+	BIT_ULL(HW_ISSUE_8443) | \
+	BIT_ULL(HW_ISSUE_8987) | \
+	BIT_ULL(HW_ISSUE_9630) | \
+	BIT_ULL(HW_ISSUE_10969) | \
+	BIT_ULL(GPUCORE_1619))
+
+#define hw_issues_t620 (\
+	BIT_ULL(HW_ISSUE_10649) | \
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_10959) | \
+	BIT_ULL(HW_ISSUE_11056) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t620_r0p1 (\
+	BIT_ULL(HW_ISSUE_10327) | \
+	BIT_ULL(HW_ISSUE_10676) | \
+	BIT_ULL(HW_ISSUE_10817) | \
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_11035))
+
+#define hw_issues_t620_r1p0 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024))
+
+#define hw_issues_t720 (\
+	BIT_ULL(HW_ISSUE_10649) | \
+	BIT_ULL(HW_ISSUE_10797) | \
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_11056) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t760 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t760_r0p0 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p1 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p1_50rel0 (\
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p2 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p3 (\
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t820 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t830 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t860 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t880 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_g31 0
+
+#define hw_issues_g31_r1p0 (\
+	BIT_ULL(HW_ISSUE_TGOX_R1_1234))
+
+#define hw_issues_g51 0
+
+#define hw_issues_g52 0
+
+#define hw_issues_g71 (\
+	BIT_ULL(HW_ISSUE_TMIX_8463) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_g71_r0p0_05dev0 (\
+	BIT_ULL(HW_ISSUE_T76X_3953))
+
+#define hw_issues_g72 0
+
+#define hw_issues_g76 0
+
+static inline bool panfrost_has_hw_issue(struct panfrost_device *pfdev,
+					 enum panfrost_hw_issue issue)
+{
+	return test_bit(issue, pfdev->features.hw_issues);
+}
+
+#endif /* __PANFROST_ISSUES_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
new file mode 100644
index 000000000000..0a7ed04f7d52
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reservation.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panfrost_drm.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+#include "panfrost_job.h"
+#include "panfrost_features.h"
+#include "panfrost_issues.h"
+#include "panfrost_gem.h"
+#include "panfrost_regs.h"
+#include "panfrost_gpu.h"
+#include "panfrost_mmu.h"
+
+#define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
+#define job_read(dev, reg) readl(dev->iomem + (reg))
+
+struct panfrost_queue_state {
+	struct drm_gpu_scheduler sched;
+
+	u64 fence_context;
+	u64 emit_seqno;
+};
+
+struct panfrost_job_slot {
+	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
+	spinlock_t job_lock;
+};
+
+static struct panfrost_job *
+to_panfrost_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct panfrost_job, base);
+}
+
+struct panfrost_fence {
+	struct dma_fence base;
+	struct drm_device *dev;
+	/* panfrost seqno for signaled() test */
+	u64 seqno;
+	int queue;
+};
+
+static inline struct panfrost_fence *
+to_panfrost_fence(struct dma_fence *fence)
+{
+	return (struct panfrost_fence *)fence;
+}
+
+static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "panfrost";
+}
+
+static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct panfrost_fence *f = to_panfrost_fence(fence);
+
+	switch (f->queue) {
+	case 0:
+		return "panfrost-js-0";
+	case 1:
+		return "panfrost-js-1";
+	case 2:
+		return "panfrost-js-2";
+	default:
+		return NULL;
+	}
+}
+
+static const struct dma_fence_ops panfrost_fence_ops = {
+	.get_driver_name = panfrost_fence_get_driver_name,
+	.get_timeline_name = panfrost_fence_get_timeline_name,
+};
+
+static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
+{
+	struct panfrost_fence *fence;
+	struct panfrost_job_slot *js = pfdev->js;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	fence->dev = pfdev->ddev;
+	fence->queue = js_num;
+	fence->seqno = ++js->queue[js_num].emit_seqno;
+	dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
+		       js->queue[js_num].fence_context, fence->seqno);
+
+	return &fence->base;
+}
+
+static int panfrost_job_get_slot(struct panfrost_job *job)
+{
+	/* JS0: fragment jobs.
+	 * JS1: vertex/tiler jobs
+	 * JS2: compute jobs
+	 */
+	if (job->requirements & PANFROST_JD_REQ_FS)
+		return 0;
+
+/* Not exposed to userspace yet */
+#if 0
+	if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
+		if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
+		    (job->pfdev->features.nr_core_groups == 2))
+			return 2;
+		if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
+			return 2;
+	}
+#endif
+	return 1;
+}
+
+static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
+					u32 requirements,
+					int js)
+{
+	u64 affinity;
+
+	/*
+	 * Use all cores for now.
+	 * Eventually we may need to support tiler only jobs and h/w with
+	 * multiple (2) coherent core groups
+	 */
+	affinity = pfdev->features.shader_present;
+
+	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF);
+	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32);
+}
+
+static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	unsigned long flags;
+	u32 cfg;
+	u64 jc_head = job->jc;
+	int ret;
+
+	ret = pm_runtime_get_sync(pfdev->dev);
+	if (ret < 0)
+		return;
+
+	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js))))
+		goto end;
+
+	panfrost_devfreq_record_transition(pfdev, js);
+	spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
+
+	job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF);
+	job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32);
+
+	panfrost_job_write_affinity(pfdev, job->requirements, js);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	/* TODO: different address spaces */
+	cfg = JS_CONFIG_THREAD_PRI(8) |
+		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
+		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
+		job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
+
+	/* GO ! */
+	dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx",
+				job, js, jc_head);
+
+	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
+
+	spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags);
+
+end:
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
+static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
+					   int bo_count,
+					   struct dma_fence **implicit_fences)
+{
+	int i;
+
+	for (i = 0; i < bo_count; i++)
+		implicit_fences[i] = reservation_object_get_excl_rcu(bos[i]->resv);
+}
+
+static void panfrost_attach_object_fences(struct drm_gem_object **bos,
+					  int bo_count,
+					  struct dma_fence *fence)
+{
+	int i;
+
+	for (i = 0; i < bo_count; i++)
+		reservation_object_add_excl_fence(bos[i]->resv, fence);
+}
+
+int panfrost_job_push(struct panfrost_job *job)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	int slot = panfrost_job_get_slot(job);
+	struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot];
+	struct ww_acquire_ctx acquire_ctx;
+	int ret = 0;
+
+	mutex_lock(&pfdev->sched_lock);
+
+	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
+					    &acquire_ctx);
+	if (ret) {
+		mutex_unlock(&pfdev->sched_lock);
+		return ret;
+	}
+
+	ret = drm_sched_job_init(&job->base, entity, NULL);
+	if (ret) {
+		mutex_unlock(&pfdev->sched_lock);
+		goto unlock;
+	}
+
+	job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+	kref_get(&job->refcount); /* put by scheduler job completion */
+
+	panfrost_acquire_object_fences(job->bos, job->bo_count,
+				       job->implicit_fences);
+
+	drm_sched_entity_push_job(&job->base, entity);
+
+	mutex_unlock(&pfdev->sched_lock);
+
+	panfrost_attach_object_fences(job->bos, job->bo_count,
+				      job->render_done_fence);
+
+unlock:
+	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
+
+	return ret;
+}
+
+static void panfrost_job_cleanup(struct kref *ref)
+{
+	struct panfrost_job *job = container_of(ref, struct panfrost_job,
+						refcount);
+	unsigned int i;
+
+	if (job->in_fences) {
+		for (i = 0; i < job->in_fence_count; i++)
+			dma_fence_put(job->in_fences[i]);
+		kvfree(job->in_fences);
+	}
+	if (job->implicit_fences) {
+		for (i = 0; i < job->bo_count; i++)
+			dma_fence_put(job->implicit_fences[i]);
+		kvfree(job->implicit_fences);
+	}
+	dma_fence_put(job->done_fence);
+	dma_fence_put(job->render_done_fence);
+
+	if (job->bos) {
+		for (i = 0; i < job->bo_count; i++)
+			drm_gem_object_put_unlocked(job->bos[i]);
+		kvfree(job->bos);
+	}
+
+	kfree(job);
+}
+
+void panfrost_job_put(struct panfrost_job *job)
+{
+	kref_put(&job->refcount, panfrost_job_cleanup);
+}
+
+static void panfrost_job_free(struct drm_sched_job *sched_job)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+
+	drm_sched_job_cleanup(sched_job);
+
+	panfrost_job_put(job);
+}
+
+static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job,
+						 struct drm_sched_entity *s_entity)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+	struct dma_fence *fence;
+	unsigned int i;
+
+	/* Explicit fences */
+	for (i = 0; i < job->in_fence_count; i++) {
+		if (job->in_fences[i]) {
+			fence = job->in_fences[i];
+			job->in_fences[i] = NULL;
+			return fence;
+		}
+	}
+
+	/* Implicit fences, max. one per BO */
+	for (i = 0; i < job->bo_count; i++) {
+		if (job->implicit_fences[i]) {
+			fence = job->implicit_fences[i];
+			job->implicit_fences[i] = NULL;
+			return fence;
+		}
+	}
+
+	return NULL;
+}
+
+static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+	struct panfrost_device *pfdev = job->pfdev;
+	int slot = panfrost_job_get_slot(job);
+	struct dma_fence *fence = NULL;
+
+	if (unlikely(job->base.s_fence->finished.error))
+		return NULL;
+
+	pfdev->jobs[slot] = job;
+
+	fence = panfrost_fence_create(pfdev, slot);
+	if (IS_ERR(fence))
+		return NULL;
+
+	if (job->done_fence)
+		dma_fence_put(job->done_fence);
+	job->done_fence = dma_fence_get(fence);
+
+	panfrost_job_hw_submit(job, slot);
+
+	return fence;
+}
+
+void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
+{
+	int j;
+	u32 irq_mask = 0;
+
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		irq_mask |= MK_JS_MASK(j);
+	}
+
+	job_write(pfdev, JOB_INT_CLEAR, irq_mask);
+	job_write(pfdev, JOB_INT_MASK, irq_mask);
+}
+
+static void panfrost_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+	struct panfrost_device *pfdev = job->pfdev;
+	int js = panfrost_job_get_slot(job);
+	int i;
+
+	/*
+	 * If the GPU managed to complete this jobs fence, the timeout is
+	 * spurious. Bail out.
+	 */
+	if (dma_fence_is_signaled(job->done_fence))
+		return;
+
+	dev_err(pfdev->dev, "gpu sched timeout, js=%d, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
+		js,
+		job_read(pfdev, JS_STATUS(js)),
+		job_read(pfdev, JS_HEAD_LO(js)),
+		job_read(pfdev, JS_TAIL_LO(js)),
+		sched_job);
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_stop(&pfdev->js->queue[i].sched);
+
+	if (sched_job)
+		drm_sched_increase_karma(sched_job);
+
+	/* panfrost_core_dump(pfdev); */
+
+	panfrost_devfreq_record_transition(pfdev, js);
+	panfrost_gpu_soft_reset(pfdev);
+
+	/* TODO: Re-enable all other address spaces */
+	panfrost_mmu_enable(pfdev, 0);
+	panfrost_gpu_power_on(pfdev);
+	panfrost_job_enable_interrupts(pfdev);
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
+
+	/* restart scheduler after GPU is usable again */
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_start(&pfdev->js->queue[i].sched, true);
+}
+
+static const struct drm_sched_backend_ops panfrost_sched_ops = {
+	.dependency = panfrost_job_dependency,
+	.run_job = panfrost_job_run,
+	.timedout_job = panfrost_job_timedout,
+	.free_job = panfrost_job_free
+};
+
+static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+	u32 status = job_read(pfdev, JOB_INT_STAT);
+	int j;
+
+	dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status);
+
+	if (!status)
+		return IRQ_NONE;
+
+	pm_runtime_mark_last_busy(pfdev->dev);
+
+	for (j = 0; status; j++) {
+		u32 mask = MK_JS_MASK(j);
+
+		if (!(status & mask))
+			continue;
+
+		job_write(pfdev, JOB_INT_CLEAR, mask);
+
+		if (status & JOB_INT_MASK_ERR(j)) {
+			job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
+
+			dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
+				j,
+				panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))),
+				job_read(pfdev, JS_HEAD_LO(j)),
+				job_read(pfdev, JS_TAIL_LO(j)));
+
+			drm_sched_fault(&pfdev->js->queue[j].sched);
+		}
+
+		if (status & JOB_INT_MASK_DONE(j)) {
+			panfrost_devfreq_record_transition(pfdev, j);
+			dma_fence_signal(pfdev->jobs[j]->done_fence);
+		}
+
+		status &= ~mask;
+	}
+
+	return IRQ_HANDLED;
+}
+
+int panfrost_job_init(struct panfrost_device *pfdev)
+{
+	struct panfrost_job_slot *js;
+	int ret, j, irq;
+
+	pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
+	if (!js)
+		return -ENOMEM;
+
+	spin_lock_init(&js->job_lock);
+
+	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
+	if (irq <= 0)
+		return -ENODEV;
+
+	ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler,
+			       IRQF_SHARED, "job", pfdev);
+	if (ret) {
+		dev_err(pfdev->dev, "failed to request job irq");
+		return ret;
+	}
+
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		js->queue[j].fence_context = dma_fence_context_alloc(1);
+
+		ret = drm_sched_init(&js->queue[j].sched,
+				     &panfrost_sched_ops,
+				     1, 0, msecs_to_jiffies(500),
+				     "pan_js");
+		if (ret) {
+			dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
+			goto err_sched;
+		}
+	}
+
+	panfrost_job_enable_interrupts(pfdev);
+
+	return 0;
+
+err_sched:
+	for (j--; j >= 0; j--)
+		drm_sched_fini(&js->queue[j].sched);
+
+	return ret;
+}
+
+void panfrost_job_fini(struct panfrost_device *pfdev)
+{
+	struct panfrost_job_slot *js = pfdev->js;
+	int j;
+
+	job_write(pfdev, JOB_INT_MASK, 0);
+
+	for (j = 0; j < NUM_JOB_SLOTS; j++)
+		drm_sched_fini(&js->queue[j].sched);
+
+}
+
+int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
+{
+	struct panfrost_device *pfdev = panfrost_priv->pfdev;
+	struct panfrost_job_slot *js = pfdev->js;
+	struct drm_sched_rq *rq;
+	int ret, i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		rq = &js->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], &rq, 1, NULL);
+		if (WARN_ON(ret))
+			return ret;
+	}
+	return 0;
+}
+
+void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
+{
+	int i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
+}
+
+int panfrost_job_is_idle(struct panfrost_device *pfdev)
+{
+	struct panfrost_job_slot *js = pfdev->js;
+	int i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		/* If there are any jobs in the HW queue, we're not idle */
+		if (atomic_read(&js->queue[i].sched.hw_rq_count))
+			return false;
+
+		/* Check whether the hardware is idle */
+		if (pfdev->devfreq.slot[i].busy)
+			return false;
+	}
+
+	return true;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
new file mode 100644
index 000000000000..62454128a792
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANFROST_JOB_H__
+#define __PANFROST_JOB_H__
+
+#include <uapi/drm/panfrost_drm.h>
+#include <drm/gpu_scheduler.h>
+
+struct panfrost_device;
+struct panfrost_gem_object;
+struct panfrost_file_priv;
+
+struct panfrost_job {
+	struct drm_sched_job base;
+
+	struct kref refcount;
+
+	struct panfrost_device *pfdev;
+	struct panfrost_file_priv *file_priv;
+
+	/* Optional fences userspace can pass in for the job to depend on. */
+	struct dma_fence **in_fences;
+	u32 in_fence_count;
+
+	/* Fence to be signaled by IRQ handler when the job is complete. */
+	struct dma_fence *done_fence;
+
+	__u64 jc;
+	__u32 requirements;
+	__u32 flush_id;
+
+	/* Exclusive fences we have taken from the BOs to wait for */
+	struct dma_fence **implicit_fences;
+	struct drm_gem_object **bos;
+	u32 bo_count;
+
+	/* Fence to be signaled by drm-sched once its done with the job */
+	struct dma_fence *render_done_fence;
+};
+
+int panfrost_job_init(struct panfrost_device *pfdev);
+void panfrost_job_fini(struct panfrost_device *pfdev);
+int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
+void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
+int panfrost_job_push(struct panfrost_job *job);
+void panfrost_job_put(struct panfrost_job *job);
+void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
+int panfrost_job_is_idle(struct panfrost_device *pfdev);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
new file mode 100644
index 000000000000..762b1bd2a8c2
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier:	GPL-2.0
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/sizes.h>
+
+#include "panfrost_device.h"
+#include "panfrost_mmu.h"
+#include "panfrost_gem.h"
+#include "panfrost_features.h"
+#include "panfrost_regs.h"
+
+#define mmu_write(dev, reg, data) writel(data, dev->iomem + reg)
+#define mmu_read(dev, reg) readl(dev->iomem + reg)
+
+struct panfrost_mmu {
+	struct io_pgtable_cfg pgtbl_cfg;
+	struct io_pgtable_ops *pgtbl_ops;
+	struct mutex lock;
+};
+
+static int wait_ready(struct panfrost_device *pfdev, u32 as_nr)
+{
+	int ret;
+	u32 val;
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. */
+	ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr),
+		val, !(val & AS_STATUS_AS_ACTIVE), 10, 1000);
+
+	if (ret)
+		dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n");
+
+	return ret;
+}
+
+static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(pfdev, as_nr);
+	if (!status)
+		mmu_write(pfdev, AS_COMMAND(as_nr), cmd);
+
+	return status;
+}
+
+static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
+			u64 iova, size_t size)
+{
+	u8 region_width;
+	u64 region = iova & PAGE_MASK;
+	/*
+	 * fls returns:
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	size = round_up(size, PAGE_SIZE);
+
+	region_width = 10 + fls(size >> PAGE_SHIFT);
+	if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) {
+		/* not pow2, so must go up to the next pow2 */
+		region_width += 1;
+	}
+	region |= region_width;
+
+	/* Lock the region that needs to be updated */
+	mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), region & 0xFFFFFFFFUL);
+	mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), (region >> 32) & 0xFFFFFFFFUL);
+	write_cmd(pfdev, as_nr, AS_COMMAND_LOCK);
+}
+
+
+static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr,
+		u64 iova, size_t size, u32 op)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
+
+	if (op != AS_COMMAND_UNLOCK)
+		lock_region(pfdev, as_nr, iova, size);
+
+	/* Run the MMU operation */
+	write_cmd(pfdev, as_nr, op);
+
+	/* Wait for the flush to complete */
+	ret = wait_ready(pfdev, as_nr);
+
+	spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr)
+{
+	struct io_pgtable_cfg *cfg = &pfdev->mmu->pgtbl_cfg;
+	u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
+	u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
+
+	mmu_write(pfdev, MMU_INT_CLEAR, ~0);
+	mmu_write(pfdev, MMU_INT_MASK, ~0);
+
+	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL);
+	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32);
+
+	/* Need to revisit mem attrs.
+	 * NC is the default, Mali driver is inner WT.
+	 */
+	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), memattr & 0xffffffffUL);
+	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), memattr >> 32);
+
+	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
+{
+	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
+	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
+
+	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), 0);
+	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), 0);
+
+	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static size_t get_pgsize(u64 addr, size_t size)
+{
+	if (addr & (SZ_2M - 1) || size < SZ_2M)
+		return SZ_4K;
+
+	return SZ_2M;
+}
+
+int panfrost_mmu_map(struct panfrost_gem_object *bo)
+{
+	struct drm_gem_object *obj = &bo->base.base;
+	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
+	struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops;
+	u64 iova = bo->node.start << PAGE_SHIFT;
+	unsigned int count;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	int ret;
+
+	sgt = drm_gem_shmem_get_pages_sgt(obj);
+	if (WARN_ON(IS_ERR(sgt)))
+		return PTR_ERR(sgt);
+
+	ret = pm_runtime_get_sync(pfdev->dev);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&pfdev->mmu->lock);
+
+	for_each_sg(sgt->sgl, sgl, sgt->nents, count) {
+		unsigned long paddr = sg_dma_address(sgl);
+		size_t len = sg_dma_len(sgl);
+
+		dev_dbg(pfdev->dev, "map: iova=%llx, paddr=%lx, len=%zx", iova, paddr, len);
+
+		while (len) {
+			size_t pgsize = get_pgsize(iova | paddr, len);
+
+			ops->map(ops, iova, paddr, pgsize, IOMMU_WRITE | IOMMU_READ);
+			iova += pgsize;
+			paddr += pgsize;
+			len -= pgsize;
+		}
+	}
+
+	mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT,
+			    bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
+
+	mutex_unlock(&pfdev->mmu->lock);
+
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+
+	return 0;
+}
+
+void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
+{
+	struct drm_gem_object *obj = &bo->base.base;
+	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
+	struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops;
+	u64 iova = bo->node.start << PAGE_SHIFT;
+	size_t len = bo->node.size << PAGE_SHIFT;
+	size_t unmapped_len = 0;
+	int ret;
+
+	dev_dbg(pfdev->dev, "unmap: iova=%llx, len=%zx", iova, len);
+
+	ret = pm_runtime_get_sync(pfdev->dev);
+	if (ret < 0)
+		return;
+
+	mutex_lock(&pfdev->mmu->lock);
+
+	while (unmapped_len < len) {
+		size_t unmapped_page;
+		size_t pgsize = get_pgsize(iova, len - unmapped_len);
+
+		unmapped_page = ops->unmap(ops, iova, pgsize);
+		if (!unmapped_page)
+			break;
+
+		iova += unmapped_page;
+		unmapped_len += unmapped_page;
+	}
+
+	mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT,
+			    bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
+
+	mutex_unlock(&pfdev->mmu->lock);
+
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
+static void mmu_tlb_inv_context_s1(void *cookie)
+{
+	struct panfrost_device *pfdev = cookie;
+
+	mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+}
+
+static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+				     size_t granule, bool leaf, void *cookie)
+{}
+
+static void mmu_tlb_sync_context(void *cookie)
+{
+	//struct panfrost_device *pfdev = cookie;
+	// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
+}
+
+static const struct iommu_gather_ops mmu_tlb_ops = {
+	.tlb_flush_all	= mmu_tlb_inv_context_s1,
+	.tlb_add_flush	= mmu_tlb_inv_range_nosync,
+	.tlb_sync	= mmu_tlb_sync_context,
+};
+
+static const char *access_type_name(struct panfrost_device *pfdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+	u32 status = mmu_read(pfdev, MMU_INT_STAT);
+	int i;
+
+	if (!status)
+		return IRQ_NONE;
+
+	dev_err(pfdev->dev, "mmu irq status=%x\n", status);
+
+	for (i = 0; status; i++) {
+		u32 mask = BIT(i) | BIT(i + 16);
+		u64 addr;
+		u32 fault_status;
+		u32 exception_type;
+		u32 access_type;
+		u32 source_id;
+
+		if (!(status & mask))
+			continue;
+
+		fault_status = mmu_read(pfdev, AS_FAULTSTATUS(i));
+		addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(i));
+		addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(i)) << 32;
+
+		/* decode the fault status */
+		exception_type = fault_status & 0xFF;
+		access_type = (fault_status >> 8) & 0x3;
+		source_id = (fault_status >> 16);
+
+		/* terminal fault, print info about the fault */
+		dev_err(pfdev->dev,
+			"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+			"Reason: %s\n"
+			"raw fault status: 0x%X\n"
+			"decoded fault status: %s\n"
+			"exception type 0x%X: %s\n"
+			"access type 0x%X: %s\n"
+			"source id 0x%X\n",
+			i, addr,
+			"TODO",
+			fault_status,
+			(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+			exception_type, panfrost_exception_name(pfdev, exception_type),
+			access_type, access_type_name(pfdev, fault_status),
+			source_id);
+
+		mmu_write(pfdev, MMU_INT_CLEAR, mask);
+
+		status &= ~mask;
+	}
+
+	return IRQ_HANDLED;
+};
+
+int panfrost_mmu_init(struct panfrost_device *pfdev)
+{
+	struct io_pgtable_ops *pgtbl_ops;
+	int err, irq;
+
+	pfdev->mmu = devm_kzalloc(pfdev->dev, sizeof(*pfdev->mmu), GFP_KERNEL);
+	if (!pfdev->mmu)
+		return -ENOMEM;
+
+	mutex_init(&pfdev->mmu->lock);
+
+	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu");
+	if (irq <= 0)
+		return -ENODEV;
+
+	err = devm_request_irq(pfdev->dev, irq, panfrost_mmu_irq_handler,
+			       IRQF_SHARED, "mmu", pfdev);
+
+	if (err) {
+		dev_err(pfdev->dev, "failed to request mmu irq");
+		return err;
+	}
+	mmu_write(pfdev, MMU_INT_CLEAR, ~0);
+	mmu_write(pfdev, MMU_INT_MASK, ~0);
+
+	pfdev->mmu->pgtbl_cfg = (struct io_pgtable_cfg) {
+		.pgsize_bitmap	= SZ_4K | SZ_2M,
+		.ias		= FIELD_GET(0xff, pfdev->features.mmu_features),
+		.oas		= FIELD_GET(0xff00, pfdev->features.mmu_features),
+		.tlb		= &mmu_tlb_ops,
+		.iommu_dev	= pfdev->dev,
+	};
+
+	pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &pfdev->mmu->pgtbl_cfg,
+					 pfdev);
+	if (!pgtbl_ops)
+		return -ENOMEM;
+
+	pfdev->mmu->pgtbl_ops = pgtbl_ops;
+
+	panfrost_mmu_enable(pfdev, 0);
+
+	return 0;
+}
+
+void panfrost_mmu_fini(struct panfrost_device *pfdev)
+{
+	mmu_write(pfdev, MMU_INT_MASK, 0);
+	mmu_disable(pfdev, 0);
+
+	free_io_pgtable_ops(pfdev->mmu->pgtbl_ops);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h
new file mode 100644
index 000000000000..f5878d86a5ce
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#ifndef __PANFROST_MMU_H__
+#define __PANFROST_MMU_H__
+
+struct panfrost_gem_object;
+
+int panfrost_mmu_map(struct panfrost_gem_object *bo);
+void panfrost_mmu_unmap(struct panfrost_gem_object *bo);
+
+int panfrost_mmu_init(struct panfrost_device *pfdev);
+void panfrost_mmu_fini(struct panfrost_device *pfdev);
+
+void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
new file mode 100644
index 000000000000..578c5fc2188b
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/*
+ * Register definitions based on mali_midg_regmap.h
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ */
+#ifndef __PANFROST_REGS_H__
+#define __PANFROST_REGS_H__
+
+#define GPU_ID				0x00
+#define GPU_L2_FEATURES			0x004	/* (RO) Level 2 cache features */
+#define GPU_CORE_FEATURES		0x008	/* (RO) Shader Core Features */
+#define GPU_TILER_FEATURES		0x00C	/* (RO) Tiler Features */
+#define GPU_MEM_FEATURES		0x010	/* (RO) Memory system features */
+#define   GROUPS_L2_COHERENT		BIT(0)	/* Cores groups are l2 coherent */
+
+#define GPU_MMU_FEATURES		0x014	/* (RO) MMU features */
+#define GPU_AS_PRESENT			0x018	/* (RO) Address space slots present */
+#define GPU_JS_PRESENT			0x01C	/* (RO) Job slots present */
+
+#define GPU_INT_RAWSTAT			0x20
+#define GPU_INT_CLEAR			0x24
+#define GPU_INT_MASK			0x28
+#define GPU_INT_STAT			0x2c
+#define   GPU_IRQ_FAULT			BIT(0)
+#define   GPU_IRQ_MULTIPLE_FAULT	BIT(7)
+#define   GPU_IRQ_RESET_COMPLETED	BIT(8)
+#define   GPU_IRQ_POWER_CHANGED		BIT(9)
+#define   GPU_IRQ_POWER_CHANGED_ALL	BIT(10)
+#define   GPU_IRQ_PERFCNT_SAMPLE_COMPLETED BIT(16)
+#define   GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17)
+#define   GPU_IRQ_MASK_ALL			 \
+	  (GPU_IRQ_FAULT			|\
+	   GPU_IRQ_MULTIPLE_FAULT		|\
+	   GPU_IRQ_RESET_COMPLETED		|\
+	   GPU_IRQ_POWER_CHANGED		|\
+	   GPU_IRQ_POWER_CHANGED_ALL		|\
+	   GPU_IRQ_PERFCNT_SAMPLE_COMPLETED	|\
+	   GPU_IRQ_CLEAN_CACHES_COMPLETED)
+#define GPU_IRQ_MASK_ERROR	   		\
+	(					\
+	 GPU_IRQ_FAULT				|\
+	 GPU_IRQ_MULTIPLE_FAULT)
+#define GPU_CMD				0x30
+#define   GPU_CMD_SOFT_RESET		0x01
+#define GPU_STATUS			0x34
+#define GPU_LATEST_FLUSH_ID		0x38
+#define GPU_FAULT_STATUS		0x3C
+#define GPU_FAULT_ADDRESS_LO		0x40
+#define GPU_FAULT_ADDRESS_HI		0x44
+
+#define GPU_THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define GPU_THREAD_MAX_WORKGROUP_SIZE	0x0A4	/* (RO) Maximum workgroup size */
+#define GPU_THREAD_MAX_BARRIER_SIZE	0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define GPU_THREAD_FEATURES		0x0AC	/* (RO) Thread features */
+#define GPU_THREAD_TLS_ALLOC		0x310   /* (RO) Number of threads per core that
+						 * TLS must be allocated for */
+
+#define GPU_TEXTURE_FEATURES(n)		(0x0B0 + ((n) * 4))
+#define GPU_JS_FEATURES(n)		(0x0C0 + ((n) * 4))
+
+#define GPU_SHADER_PRESENT_LO		0x100	/* (RO) Shader core present bitmap, low word */
+#define GPU_SHADER_PRESENT_HI		0x104	/* (RO) Shader core present bitmap, high word */
+#define GPU_TILER_PRESENT_LO		0x110	/* (RO) Tiler core present bitmap, low word */
+#define GPU_TILER_PRESENT_HI		0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define GPU_L2_PRESENT_LO		0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define GPU_L2_PRESENT_HI		0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define GPU_COHERENCY_FEATURES		0x300	/* (RO) Coherency features present */
+#define   COHERENCY_ACE_LITE		BIT(0)
+#define   COHERENCY_ACE			BIT(1)
+
+#define GPU_STACK_PRESENT_LO		0xE00   /* (RO) Core stack present bitmap, low word */
+#define GPU_STACK_PRESENT_HI		0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO			0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI			0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO			0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI			0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO			0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI			0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO			0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI			0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO			0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI			0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO			0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI			0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO			0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI			0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO			0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI			0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO		0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI		0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO			0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI			0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO			0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI			0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO			0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI			0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO		0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI		0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO		0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI		0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO			0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI			0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO		0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI		0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO		0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI		0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO		0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI		0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO			0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI			0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define GPU_JM_CONFIG			0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define GPU_SHADER_CONFIG		0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define GPU_TILER_CONFIG		0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define GPU_L2_MMU_CONFIG		0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT	23
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY		(0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT	24
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS		(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT	(0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER	(0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF		(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT	26
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES		(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT	(0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER	(0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF	(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT	12
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS		(0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT	15
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES	(0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+/* SHADER_CONFIG register */
+#define SC_ALT_COUNTERS			BIT(3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL	BIT(4)
+#define SC_SDC_DISABLE_OQ_DISCARD	BIT(6)
+#define SC_LS_ALLOW_ATTR_TYPES		BIT(16)
+#define SC_LS_PAUSEBUFFER_DISABLE	BIT(16)
+#define SC_TLS_HASH_ENABLE		BIT(17)
+#define SC_LS_ATTR_CHECK_DISABLE	BIT(18)
+#define SC_ENABLE_TEXGRD_FLAGS		BIT(25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE		BIT(0)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE		BIT(0)
+#define JM_CLOCK_GATE_OVERRIDE		BIT(1)
+#define JM_JOB_THROTTLE_ENABLE		BIT(2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT	3
+#define JM_MAX_JOB_THROTTLE_LIMIT	0x3F
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT 2
+#define JM_IDVS_GROUP_SIZE_SHIFT	16
+#define JM_MAX_IDVS_GROUP_SIZE		0x3F
+
+
+/* Job Control regs */
+#define JOB_INT_RAWSTAT			0x1000
+#define JOB_INT_CLEAR			0x1004
+#define JOB_INT_MASK			0x1008
+#define JOB_INT_STAT			0x100c
+#define JOB_INT_JS_STATE		0x1010
+#define JOB_INT_THROTTLE		0x1014
+
+#define MK_JS_MASK(j)			(0x10001 << (j))
+#define JOB_INT_MASK_ERR(j)		BIT((j) + 16)
+#define JOB_INT_MASK_DONE(j)		BIT(j)
+
+#define JS_BASE				0x1800
+#define JS_HEAD_LO(n)			(JS_BASE + ((n) * 0x80) + 0x00)
+#define JS_HEAD_HI(n)			(JS_BASE + ((n) * 0x80) + 0x04)
+#define JS_TAIL_LO(n)			(JS_BASE + ((n) * 0x80) + 0x08)
+#define JS_TAIL_HI(n)			(JS_BASE + ((n) * 0x80) + 0x0c)
+#define JS_AFFINITY_LO(n)		(JS_BASE + ((n) * 0x80) + 0x10)
+#define JS_AFFINITY_HI(n)		(JS_BASE + ((n) * 0x80) + 0x14)
+#define JS_CONFIG(n)			(JS_BASE + ((n) * 0x80) + 0x18)
+#define JS_XAFFINITY(n)			(JS_BASE + ((n) * 0x80) + 0x1c)
+#define JS_COMMAND(n)			(JS_BASE + ((n) * 0x80) + 0x20)
+#define JS_STATUS(n)			(JS_BASE + ((n) * 0x80) + 0x24)
+#define JS_HEAD_NEXT_LO(n)		(JS_BASE + ((n) * 0x80) + 0x40)
+#define JS_HEAD_NEXT_HI(n)		(JS_BASE + ((n) * 0x80) + 0x44)
+#define JS_AFFINITY_NEXT_LO(n)		(JS_BASE + ((n) * 0x80) + 0x50)
+#define JS_AFFINITY_NEXT_HI(n)		(JS_BASE + ((n) * 0x80) + 0x54)
+#define JS_CONFIG_NEXT(n)		(JS_BASE + ((n) * 0x80) + 0x58)
+#define JS_COMMAND_NEXT(n)		(JS_BASE + ((n) * 0x80) + 0x60)
+#define JS_FLUSH_ID_NEXT(n)		(JS_BASE + ((n) * 0x80) + 0x70)
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_CLEAN		BIT(8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE	(3u << 8)
+#define JS_CONFIG_START_MMU			BIT(10)
+#define JS_CONFIG_JOB_CHAIN_FLAG		BIT(11)
+#define JS_CONFIG_END_FLUSH_CLEAN		BIT(12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE	(3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION	BIT(14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK	BIT(15)
+#define JS_CONFIG_THREAD_PRI(n)			((n) << 16)
+
+#define JS_COMMAND_NOP			0x00
+#define JS_COMMAND_START		0x01
+#define JS_COMMAND_SOFT_STOP		0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP		0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0		0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0		0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1		0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1		0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_STATUS_EVENT_ACTIVE		0x08
+
+
+/* MMU regs */
+#define MMU_INT_RAWSTAT			0x2000
+#define MMU_INT_CLEAR			0x2004
+#define MMU_INT_MASK			0x2008
+#define MMU_INT_STAT			0x200c
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP			0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE		0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK			0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK		0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH		0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+						   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT		0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM		0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+						   flush all L2 caches then issue a flush region command to all MMUs */
+
+#define MMU_AS(as)			(0x2400 + ((as) << 6))
+
+#define AS_TRANSTAB_LO(as)		(MMU_AS(as) + 0x00) /* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI(as)		(MMU_AS(as) + 0x04) /* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO(as)		(MMU_AS(as) + 0x08) /* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI(as)		(MMU_AS(as) + 0x0C) /* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO(as)		(MMU_AS(as) + 0x10) /* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI(as)		(MMU_AS(as) + 0x14) /* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND(as)			(MMU_AS(as) + 0x18) /* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS(as)		(MMU_AS(as) + 0x1C) /* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO(as)		(MMU_AS(as) + 0x20) /* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI(as)		(MMU_AS(as) + 0x24) /* (RO) Fault Address for address space n, high word */
+#define AS_STATUS(as)			(MMU_AS(as) + 0x28) /* (RO) Status flags for address space n */
+/* Additional Bifrost AS regsiters */
+#define AS_TRANSCFG_LO(as)		(MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_HI(as)		(MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */
+#define AS_FAULTEXTRA_LO(as)		(MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_HI(as)		(MMU_AS(as) + 0x3C) /* (RO) Secondary fault address for address space n, high word */
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK	0xfffffffffffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY	0x2
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE		0x3
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK		0x3
+#define AS_TRANSTAB_LPAE_READ_INNER		BIT(2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER		BIT(4)
+
+#define AS_STATUS_AS_ACTIVE			0x01
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK		(0x3 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC	(0x0 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX		(0x1 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE	(0x3 << 8)
+
+#endif
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index ee59da4a0172..4e5922c89d7b 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -361,13 +361,6 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr = drm_fb_cma_get_gem_addr(fb, state, 0);
 	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
-	/*
-	 * backend DMA accesses DRAM directly, bypassing the system
-	 * bus. As such, the address range is different and the buffer
-	 * address needs to be corrected.
-	 */
-	paddr -= PHYS_OFFSET;
-
 	if (fb->format->is_yuv)
 		return sun4i_backend_update_yuv_buffer(backend, fb, paddr);
 
@@ -803,6 +796,27 @@ static int sun4i_backend_bind(struct device *dev, struct device *master,
 	dev_set_drvdata(dev, backend);
 	spin_lock_init(&backend->frontend_lock);
 
+	if (of_find_property(dev->of_node, "interconnects", NULL)) {
+		/*
+		 * This assume we have the same DMA constraints for all our the
+		 * devices in our pipeline (all the backends, but also the
+		 * frontends). This sounds bad, but it has always been the case
+		 * for us, and DRM doesn't do per-device allocation either, so
+		 * we would need to fix DRM first...
+		 */
+		ret = of_dma_configure(drm->dev, dev->of_node, true);
+		if (ret)
+			return ret;
+	} else {
+		/*
+		 * If we don't have the interconnect property, most likely
+		 * because of an old DT, we need to set the DMA offset by hand
+		 * on our device since the RAM mapping is at 0 for the DMA bus,
+		 * unlike the CPU.
+		 */
+		drm->dev->dma_pfn_offset = PHYS_PFN_OFFSET;
+	}
+
 	backend->engine.node = dev->of_node;
 	backend->engine.ops = &sun4i_backend_engine_ops;
 	backend->engine.id = sun4i_backend_of_get_id(dev->of_node);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index fa92e992a282..9d8d8124b1f6 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -236,8 +236,8 @@ static struct sun4i_tcon *sun4i_get_tcon0(struct drm_device *drm)
 	return NULL;
 }
 
-void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel,
-			const struct drm_encoder *encoder)
+static void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel,
+			       const struct drm_encoder *encoder)
 {
 	int ret = -ENOTSUPP;
 
diff --git a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c
index b1e7c76e9c17..3267d0f9b9b2 100644
--- a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c
+++ b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c
@@ -269,12 +269,12 @@ static int sun8i_tcon_top_remove(struct platform_device *pdev)
 	return 0;
 }
 
-const struct sun8i_tcon_top_quirks sun8i_r40_tcon_top_quirks = {
+static const struct sun8i_tcon_top_quirks sun8i_r40_tcon_top_quirks = {
 	.has_tcon_tv1	= true,
 	.has_dsi	= true,
 };
 
-const struct sun8i_tcon_top_quirks sun50i_h6_tcon_top_quirks = {
+static const struct sun8i_tcon_top_quirks sun50i_h6_tcon_top_quirks = {
 	/* Nothing special */
 };
 
diff --git a/drivers/gpu/drm/tinydrm/hx8357d.c b/drivers/gpu/drm/tinydrm/hx8357d.c
index fab961dded87..5773d0fb6ca1 100644
--- a/drivers/gpu/drm/tinydrm/hx8357d.c
+++ b/drivers/gpu/drm/tinydrm/hx8357d.c
@@ -267,7 +267,7 @@ static int hx8357d_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, drm);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c
index e9116ef4b5bc..4b1a587c0134 100644
--- a/drivers/gpu/drm/tinydrm/ili9225.c
+++ b/drivers/gpu/drm/tinydrm/ili9225.c
@@ -433,7 +433,7 @@ static int ili9225_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, drm);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tinydrm/ili9341.c b/drivers/gpu/drm/tinydrm/ili9341.c
index d15f85e837ae..4ade9e4b924f 100644
--- a/drivers/gpu/drm/tinydrm/ili9341.c
+++ b/drivers/gpu/drm/tinydrm/ili9341.c
@@ -229,7 +229,7 @@ static int ili9341_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, drm);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c
index c6dc31084a4e..8e169846fbd8 100644
--- a/drivers/gpu/drm/tinydrm/mi0283qt.c
+++ b/drivers/gpu/drm/tinydrm/mi0283qt.c
@@ -242,7 +242,7 @@ static int mi0283qt_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, drm);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c
index a29b8278324b..370629e2de94 100644
--- a/drivers/gpu/drm/tinydrm/repaper.c
+++ b/drivers/gpu/drm/tinydrm/repaper.c
@@ -1131,7 +1131,7 @@ static int repaper_probe(struct spi_device *spi)
 
 	DRM_DEBUG_DRIVER("SPI speed: %uMHz\n", spi->max_speed_hz / 1000000);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tinydrm/st7586.c b/drivers/gpu/drm/tinydrm/st7586.c
index 560d7ac0cadc..36bb16a15f7e 100644
--- a/drivers/gpu/drm/tinydrm/st7586.c
+++ b/drivers/gpu/drm/tinydrm/st7586.c
@@ -408,7 +408,7 @@ static int st7586_probe(struct spi_device *spi)
 	DRM_DEBUG_KMS("preferred_depth=%u, rotation = %u\n",
 		      drm->mode_config.preferred_depth, rotation);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tinydrm/st7735r.c b/drivers/gpu/drm/tinydrm/st7735r.c
index 022e9849b95b..ce9109e613e0 100644
--- a/drivers/gpu/drm/tinydrm/st7735r.c
+++ b/drivers/gpu/drm/tinydrm/st7735r.c
@@ -207,7 +207,7 @@ static int st7735r_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, drm);
 
-	drm_fbdev_generic_setup(drm, 32);
+	drm_fbdev_generic_setup(drm, 0);
 
 	return 0;
 }
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index d3700ec15cbd..4e21efbc4459 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -172,6 +172,10 @@
 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE	1
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV	2
 
+#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
+#define ARM_MALI_LPAE_TTBR_READ_INNER	BIT(2)
+#define ARM_MALI_LPAE_TTBR_SHARE_OUTER	BIT(4)
+
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
@@ -180,11 +184,6 @@
 
 #define iopte_prot(pte)	((pte) & ARM_LPAE_PTE_ATTR_MASK)
 
-#define iopte_leaf(pte,l)					\
-	(l == (ARM_LPAE_MAX_LEVELS - 1) ?			\
-		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) :	\
-		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
-
 struct arm_lpae_io_pgtable {
 	struct io_pgtable	iop;
 
@@ -198,6 +197,15 @@ struct arm_lpae_io_pgtable {
 
 typedef u64 arm_lpae_iopte;
 
+static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
+			      enum io_pgtable_fmt fmt)
+{
+	if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
+		return iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_PAGE;
+
+	return iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_BLOCK;
+}
+
 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
 				     struct arm_lpae_io_pgtable *data)
 {
@@ -303,12 +311,14 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		pte |= ARM_LPAE_PTE_NS;
 
-	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
+	if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
 		pte |= ARM_LPAE_PTE_TYPE_PAGE;
 	else
 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
-	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
+	if (data->iop.fmt != ARM_MALI_LPAE)
+		pte |= ARM_LPAE_PTE_AF;
+	pte |= ARM_LPAE_PTE_SH_IS;
 	pte |= paddr_to_iopte(paddr, data);
 
 	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
@@ -321,7 +331,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 {
 	arm_lpae_iopte pte = *ptep;
 
-	if (iopte_leaf(pte, lvl)) {
+	if (iopte_leaf(pte, lvl, data->iop.fmt)) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
@@ -409,7 +419,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		__arm_lpae_sync_pte(ptep, cfg);
 	}
 
-	if (pte && !iopte_leaf(pte, lvl)) {
+	if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
 		cptep = iopte_deref(pte, data);
 	} else if (pte) {
 		/* We require an unmap first */
@@ -429,31 +439,37 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 	if (data->iop.fmt == ARM_64_LPAE_S1 ||
 	    data->iop.fmt == ARM_32_LPAE_S1) {
 		pte = ARM_LPAE_PTE_nG;
-
 		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
 			pte |= ARM_LPAE_PTE_AP_RDONLY;
-
 		if (!(prot & IOMMU_PRIV))
 			pte |= ARM_LPAE_PTE_AP_UNPRIV;
-
-		if (prot & IOMMU_MMIO)
-			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
-				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
-		else if (prot & IOMMU_CACHE)
-			pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
-				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
 	} else {
 		pte = ARM_LPAE_PTE_HAP_FAULT;
 		if (prot & IOMMU_READ)
 			pte |= ARM_LPAE_PTE_HAP_READ;
 		if (prot & IOMMU_WRITE)
 			pte |= ARM_LPAE_PTE_HAP_WRITE;
+	}
+
+	/*
+	 * Note that this logic is structured to accommodate Mali LPAE
+	 * having stage-1-like attributes but stage-2-like permissions.
+	 */
+	if (data->iop.fmt == ARM_64_LPAE_S2 ||
+	    data->iop.fmt == ARM_32_LPAE_S2) {
 		if (prot & IOMMU_MMIO)
 			pte |= ARM_LPAE_PTE_MEMATTR_DEV;
 		else if (prot & IOMMU_CACHE)
 			pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
 		else
 			pte |= ARM_LPAE_PTE_MEMATTR_NC;
+	} else {
+		if (prot & IOMMU_MMIO)
+			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
+				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
+		else if (prot & IOMMU_CACHE)
+			pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
+				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
 	}
 
 	if (prot & IOMMU_NOEXEC)
@@ -511,7 +527,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 	while (ptep != end) {
 		arm_lpae_iopte pte = *ptep++;
 
-		if (!pte || iopte_leaf(pte, lvl))
+		if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
 			continue;
 
 		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
@@ -602,7 +618,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
 		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
 
-		if (!iopte_leaf(pte, lvl)) {
+		if (!iopte_leaf(pte, lvl, iop->fmt)) {
 			/* Also flush any partial walks */
 			io_pgtable_tlb_add_flush(iop, iova, size,
 						ARM_LPAE_GRANULE(data), false);
@@ -621,7 +637,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		}
 
 		return size;
-	} else if (iopte_leaf(pte, lvl)) {
+	} else if (iopte_leaf(pte, lvl, iop->fmt)) {
 		/*
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
@@ -669,7 +685,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 			return 0;
 
 		/* Leaf entry? */
-		if (iopte_leaf(pte,lvl))
+		if (iopte_leaf(pte, lvl, data->iop.fmt))
 			goto found_translation;
 
 		/* Take it to the next level */
@@ -995,6 +1011,32 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	return iop;
 }
 
+static struct io_pgtable *
+arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct io_pgtable *iop;
+
+	if (cfg->ias != 48 || cfg->oas > 40)
+		return NULL;
+
+	cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
+	iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
+	if (iop) {
+		u64 mair, ttbr;
+
+		/* Copy values as union fields overlap */
+		mair = cfg->arm_lpae_s1_cfg.mair[0];
+		ttbr = cfg->arm_lpae_s1_cfg.ttbr[0];
+
+		cfg->arm_mali_lpae_cfg.memattr = mair;
+		cfg->arm_mali_lpae_cfg.transtab = ttbr |
+			ARM_MALI_LPAE_TTBR_READ_INNER |
+			ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+	}
+
+	return iop;
+}
+
 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
 	.alloc	= arm_64_lpae_alloc_pgtable_s1,
 	.free	= arm_lpae_free_pgtable,
@@ -1015,6 +1057,11 @@ struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
 	.free	= arm_lpae_free_pgtable,
 };
 
+struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
+	.alloc	= arm_mali_lpae_alloc_pgtable,
+	.free	= arm_lpae_free_pgtable,
+};
+
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
 
 static struct io_pgtable_cfg *cfg_cookie;
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 93f2880be6c6..5227cfdbb65b 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -30,6 +30,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 	[ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns,
 	[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
 	[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
+	[ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
 #endif
 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
 	[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 8b552b1a6ce9..268b2cf0052a 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -90,7 +90,7 @@ struct drm_client_dev {
 int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
 		    const char *name, const struct drm_client_funcs *funcs);
 void drm_client_release(struct drm_client_dev *client);
-void drm_client_add(struct drm_client_dev *client);
+void drm_client_register(struct drm_client_dev *client);
 
 void drm_client_dev_unregister(struct drm_device *dev);
 void drm_client_dev_hotplug(struct drm_device *dev);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 17857e458ac3..40af2866c26a 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -102,29 +102,6 @@ struct drm_fb_helper_funcs {
 	 */
 	int (*fb_probe)(struct drm_fb_helper *helper,
 			struct drm_fb_helper_surface_size *sizes);
-
-	/**
-	 * @initial_config:
-	 *
-	 * Driver callback to setup an initial fbdev display configuration.
-	 * Drivers can use this callback to tell the fbdev emulation what the
-	 * preferred initial configuration is. This is useful to implement
-	 * smooth booting where the fbdev (and subsequently all userspace) never
-	 * changes the mode, but always inherits the existing configuration.
-	 *
-	 * This callback is optional.
-	 *
-	 * RETURNS:
-	 *
-	 * The driver should return true if a suitable initial configuration has
-	 * been filled out and false when the fbdev helper should fall back to
-	 * the default probing logic.
-	 */
-	bool (*initial_config)(struct drm_fb_helper *fb_helper,
-			       struct drm_fb_helper_crtc **crtcs,
-			       struct drm_display_mode **modes,
-			       struct drm_fb_offset *offsets,
-			       bool *enabled, int width, int height);
 };
 
 struct drm_fb_helper_connector {
diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 6f84380757ee..085d63faee12 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -15,17 +15,18 @@ struct drm_rect;
 
 void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 		   struct drm_rect *clip);
-void drm_fb_memcpy_dstclip(void *dst, void *vaddr, struct drm_framebuffer *fb,
+void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
+			   struct drm_framebuffer *fb,
 			   struct drm_rect *clip);
 void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
 		   struct drm_rect *clip);
 void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
 			       struct drm_framebuffer *fb,
-			       struct drm_rect *clip, bool swap);
-void drm_fb_xrgb8888_to_rgb565_dstclip(void *dst, unsigned int dst_pitch,
+			       struct drm_rect *clip, bool swab);
+void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch,
 				       void *vaddr, struct drm_framebuffer *fb,
-				       struct drm_rect *clip, bool swap);
-void drm_fb_xrgb8888_to_rgb888_dstclip(void *dst, unsigned int dst_pitch,
+				       struct drm_rect *clip, bool swab);
+void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch,
 				       void *vaddr, struct drm_framebuffer *fb,
 				       struct drm_rect *clip);
 void drm_fb_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb,
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 2955aaab3dca..5047c7ee25f5 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -381,6 +381,8 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj);
 void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
 		bool dirty, bool accessed);
 
+int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles,
+			   int count, struct drm_gem_object ***objs_out);
 struct drm_gem_object *drm_gem_object_lookup(struct drm_file *filp, u32 handle);
 long drm_gem_reservation_object_wait(struct drm_file *filep, u32 handle,
 				    bool wait_all, unsigned long timeout);
@@ -388,6 +390,11 @@ int drm_gem_lock_reservations(struct drm_gem_object **objs, int count,
 			      struct ww_acquire_ctx *acquire_ctx);
 void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
 				 struct ww_acquire_ctx *acquire_ctx);
+int drm_gem_fence_array_add(struct xarray *fence_array,
+			    struct dma_fence *fence);
+int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
+				     struct drm_gem_object *obj,
+				     bool write);
 int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 			    u32 handle, u64 *offset);
 int drm_gem_dumb_destroy(struct drm_file *file,
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 6b788467b2e3..974717d6ac0c 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -112,6 +112,14 @@ struct dma_fence_cb {
  */
 struct dma_fence_ops {
 	/**
+	 * @use_64bit_seqno:
+	 *
+	 * True if this dma_fence implementation uses 64bit seqno, false
+	 * otherwise.
+	 */
+	bool use_64bit_seqno;
+
+	/**
 	 * @get_driver_name:
 	 *
 	 * Returns the driver name. This is a callback to allow drivers to
@@ -410,18 +418,19 @@ dma_fence_is_signaled(struct dma_fence *fence)
  * __dma_fence_is_later - return if f1 is chronologically later than f2
  * @f1: the first fence's seqno
  * @f2: the second fence's seqno from the same context
+ * @ops: dma_fence_ops associated with the seqno
  *
  * Returns true if f1 is chronologically later than f2. Both fences must be
  * from the same context, since a seqno is not common across contexts.
  */
-static inline bool __dma_fence_is_later(u64 f1, u64 f2)
+static inline bool __dma_fence_is_later(u64 f1, u64 f2,
+					const struct dma_fence_ops *ops)
 {
 	/* This is for backward compatibility with drivers which can only handle
-	 * 32bit sequence numbers. Use a 64bit compare when any of the higher
-	 * bits are none zero, otherwise use a 32bit compare with wrap around
-	 * handling.
+	 * 32bit sequence numbers. Use a 64bit compare when the driver says to
+	 * do so.
 	 */
-	if (upper_32_bits(f1) || upper_32_bits(f2))
+	if (ops->use_64bit_seqno)
 		return f1 > f2;
 
 	return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0;
@@ -441,7 +450,7 @@ static inline bool dma_fence_is_later(struct dma_fence *f1,
 	if (WARN_ON(f1->context != f2->context))
 		return false;
 
-	return __dma_fence_is_later(f1->seqno, f2->seqno);
+	return __dma_fence_is_later(f1->seqno, f2->seqno, f1->ops);
 }
 
 /**
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 47d5ae559329..76969a564831 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -12,6 +12,7 @@ enum io_pgtable_fmt {
 	ARM_64_LPAE_S1,
 	ARM_64_LPAE_S2,
 	ARM_V7S,
+	ARM_MALI_LPAE,
 	IO_PGTABLE_NUM_FMTS,
 };
 
@@ -108,6 +109,11 @@ struct io_pgtable_cfg {
 			u32	nmrr;
 			u32	prrr;
 		} arm_v7s_cfg;
+
+		struct {
+			u64	transtab;
+			u64	memattr;
+		} arm_mali_lpae_cfg;
 	};
 };
 
@@ -209,5 +215,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
 
 #endif /* __IO_PGTABLE_H */
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
new file mode 100644
index 000000000000..a52e0283b90d
--- /dev/null
+++ b/include/uapi/drm/panfrost_drm.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Broadcom
+ * Copyright © 2019 Collabora ltd.
+ */
+#ifndef _PANFROST_DRM_H_
+#define _PANFROST_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_PANFROST_SUBMIT			0x00
+#define DRM_PANFROST_WAIT_BO			0x01
+#define DRM_PANFROST_CREATE_BO			0x02
+#define DRM_PANFROST_MMAP_BO			0x03
+#define DRM_PANFROST_GET_PARAM			0x04
+#define DRM_PANFROST_GET_BO_OFFSET		0x05
+
+#define DRM_IOCTL_PANFROST_SUBMIT		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
+#define DRM_IOCTL_PANFROST_WAIT_BO		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
+#define DRM_IOCTL_PANFROST_CREATE_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_BO, struct drm_panfrost_create_bo)
+#define DRM_IOCTL_PANFROST_MMAP_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MMAP_BO, struct drm_panfrost_mmap_bo)
+#define DRM_IOCTL_PANFROST_GET_PARAM		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param)
+#define DRM_IOCTL_PANFROST_GET_BO_OFFSET	DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset)
+
+#define PANFROST_JD_REQ_FS (1 << 0)
+/**
+ * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute a render command list.
+ */
+struct drm_panfrost_submit {
+
+	/** Address to GPU mapping of job descriptor */
+	__u64 jc;
+
+	/** An optional array of sync objects to wait on before starting this job. */
+	__u64 in_syncs;
+
+	/** Number of sync objects to wait on before starting this job. */
+	__u32 in_sync_count;
+
+	/** An optional sync object to place the completion fence in. */
+	__u32 out_sync;
+
+	/** Pointer to a u32 array of the BOs that are referenced by the job. */
+	__u64 bo_handles;
+
+	/** Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/** A combination of PANFROST_JD_REQ_* */
+	__u32 requirements;
+};
+
+/**
+ * struct drm_panfrost_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_PANFROST_SUBMIT on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_panfrost_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__s64 timeout_ns;	/* absolute */
+};
+
+/**
+ * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_panfrost_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/* Pad, must be zero-filled. */
+	__u32 pad;
+	/**
+	 * Returned offset for the BO in the GPU address space.  This offset
+	 * is private to the DRM fd and is valid for the lifetime of the GEM
+	 * handle.
+	 *
+	 * This offset value will always be nonzero, since various HW
+	 * units treat 0 specially.
+	 */
+	__u64 offset;
+};
+
+/**
+ * struct drm_panfrost_mmap_bo - ioctl argument for mapping Panfrost BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_panfrost_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+enum drm_panfrost_param {
+	DRM_PANFROST_PARAM_GPU_PROD_ID,
+};
+
+struct drm_panfrost_get_param {
+	__u32 param;
+	__u32 pad;
+	__u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the GPU address space for this DRM fd.
+ * This is the same value returned by drm_panfrost_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_panfrost_get_bo_offset {
+	__u32 handle;
+	__u32 pad;
+	__u64 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _PANFROST_DRM_H_ */
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 8e88eba1fa7a..0c85914d9369 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -40,8 +40,16 @@
 
 #include <linux/types.h>
 
-#define VIRTIO_GPU_F_VIRGL 0
-#define VIRTIO_GPU_F_EDID  1
+/*
+ * VIRTIO_GPU_CMD_CTX_*
+ * VIRTIO_GPU_CMD_*_3D
+ */
+#define VIRTIO_GPU_F_VIRGL               0
+
+/*
+ * VIRTIO_GPU_CMD_GET_EDID
+ */
+#define VIRTIO_GPU_F_EDID                1
 
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,