105 files changed, 14367 insertions, 667 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 5a0c476361c3..3914aaf443a8 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -74,10 +74,12 @@ config DRM_KUNIT_TEST_HELPERS
 
 config DRM_KUNIT_TEST
 	tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
-	depends on DRM && KUNIT && MMU
+	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on KUNIT
+	depends on MMU
 	select DRM_BUDDY
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_EXEC
 	select DRM_EXPORT_FOR_TESTS if m
 	select DRM_GEM_SHMEM_HELPER
@@ -371,6 +373,8 @@ source "drivers/gpu/drm/lima/Kconfig"
 
 source "drivers/gpu/drm/panfrost/Kconfig"
 
+source "drivers/gpu/drm/panthor/Kconfig"
+
 source "drivers/gpu/drm/aspeed/Kconfig"
 
 source "drivers/gpu/drm/mcde/Kconfig"
@@ -414,3 +418,16 @@ config DRM_LIB_RANDOM
 config DRM_PRIVACY_SCREEN
 	bool
 	default n
+
+config DRM_WERROR
+	bool "Compile the drm subsystem with warnings as errors"
+	depends on DRM && EXPERT
+	default n
+	help
+	  A kernel build should not cause any compiler warnings, and this
+	  enables the '-Werror' flag to enforce that rule in the drm subsystem.
+
+	  The drm subsystem enables more warnings than the kernel default, so
+	  this config option is disabled by default.
+
+	  If in doubt, say N.
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 104b42df2e95..a73c04d2d7a3 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -5,6 +5,34 @@
 
 CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG)	+= -DDYNAMIC_DEBUG_MODULE
 
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+# FIXME: fix -Wformat-truncation warnings and uncomment
+#subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
+
+# Enable -Werror in CI and development
+subdir-ccflags-$(CONFIG_DRM_WERROR) += -Werror
+
 drm-y := \
 	drm_aperture.o \
 	drm_atomic.o \
@@ -179,6 +207,7 @@ obj-$(CONFIG_DRM_XEN) += xen/
 obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
 obj-$(CONFIG_DRM_LIMA)  += lima/
 obj-$(CONFIG_DRM_PANFROST) += panfrost/
+obj-$(CONFIG_DRM_PANTHOR) += panthor/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
 obj-$(CONFIG_DRM_MCDE) += mcde/
 obj-$(CONFIG_DRM_TIDSS) += tidss/
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 22d88f8ef527..b0365cc1374e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -2,13 +2,15 @@
 
 config DRM_AMDGPU
 	tristate "AMD GPU"
-	depends on DRM && PCI && MMU
+	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDCP_HELPER
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on MMU
+	depends on PCI
 	depends on !UML
 	select FW_LOADER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HDCP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select DRM_SCHED
 	select DRM_TTM
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 010b0cb7693c..8bc79924d171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -173,6 +173,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 			abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
 			AMDGPU_PL_PREEMPT : TTM_PL_TT;
 		places[c].flags = 0;
+		/*
+		 * When GTT is just an alternative to VRAM make sure that we
+		 * only use it as fallback and still try to fill up VRAM first.
+		 */
+		if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+			places[c].flags |= TTM_PL_FLAG_FALLBACK;
 		c++;
 	}
 
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index efd996f6c138..d1fbf8796fea 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -92,13 +92,12 @@ config DRM_FSL_LDB
 
 config DRM_ITE_IT6505
 	tristate "ITE IT6505 DisplayPort bridge"
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDCP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDCP_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_DP_AUX_BUS
 	select DRM_KMS_HELPER
-	select DRM_DP_HELPER
 	select EXTCON
 	select CRYPTO
 	select CRYPTO_HASH
@@ -226,10 +225,10 @@ config DRM_PARADE_PS8622
 
 config DRM_PARADE_PS8640
 	tristate "Parade PS8640 MIPI DSI to eDP Converter"
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_DP_AUX_BUS
 	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL
@@ -313,9 +312,9 @@ config DRM_TOSHIBA_TC358764
 
 config DRM_TOSHIBA_TC358767
 	tristate "Toshiba TC358767 eDP bridge"
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select REGMAP_I2C
 	select DRM_MIPI_DSI
@@ -336,9 +335,9 @@ config DRM_TOSHIBA_TC358768
 
 config DRM_TOSHIBA_TC358775
 	tristate "Toshiba TC358775 DSI/LVDS bridge"
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select REGMAP_I2C
 	select DRM_PANEL
@@ -381,15 +380,15 @@ config DRM_TI_SN65DSI83
 
 config DRM_TI_SN65DSI86
 	tristate "TI SN65DSI86 DSI to eDP bridge"
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select REGMAP_I2C
 	select DRM_PANEL
 	select DRM_MIPI_DSI
 	select AUXILIARY_BUS
-	select DRM_DP_AUX_BUS
 	help
 	  Texas Instruments SN65DSI86 DSI to eDP Bridge driver
 
diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig
index 173dada218ec..12bfea53bf24 100644
--- a/drivers/gpu/drm/bridge/analogix/Kconfig
+++ b/drivers/gpu/drm/bridge/analogix/Kconfig
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_ANALOGIX_ANX6345
 	tristate "Analogix ANX6345 bridge"
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
 	select DRM_ANALOGIX_DP
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select REGMAP_I2C
 	help
@@ -15,9 +15,9 @@ config DRM_ANALOGIX_ANX6345
 
 config DRM_ANALOGIX_ANX78XX
 	tristate "Analogix ANX78XX bridge"
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	select DRM_ANALOGIX_DP
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select REGMAP_I2C
 	help
@@ -33,11 +33,11 @@ config DRM_ANALOGIX_DP
 config DRM_ANALOGIX_ANX7625
 	tristate "Analogix Anx7625 MIPI to DP interface support"
 	depends on DRM
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDCP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDCP_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_DP_AUX_BUS
 	select DRM_MIPI_DSI
 	help
 	  ANX7625 is an ultra-low power 4K mobile HD transmitter
diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig
index cced81633ddc..7817f6f56607 100644
--- a/drivers/gpu/drm/bridge/cadence/Kconfig
+++ b/drivers/gpu/drm/bridge/cadence/Kconfig
@@ -23,12 +23,12 @@ endif
 
 config DRM_CDNS_MHDP8546
 	tristate "Cadence DPI/DP bridge"
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDCP_HELPER
-	select DRM_DISPLAY_HELPER
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDCP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on OF
 	select DRM_KMS_HELPER
 	select DRM_PANEL_BRIDGE
-	depends on OF
 	help
 	  Support Cadence DPI to DP bridge. This is an internal
 	  bridge and is meant to be directly embedded in a SoC.
diff --git a/drivers/gpu/drm/bridge/imx/Kconfig b/drivers/gpu/drm/bridge/imx/Kconfig
index 5965e8027529..7687ed652df5 100644
--- a/drivers/gpu/drm/bridge/imx/Kconfig
+++ b/drivers/gpu/drm/bridge/imx/Kconfig
@@ -5,9 +5,9 @@ config DRM_IMX_LDB_HELPER
 
 config DRM_IMX8MP_DW_HDMI_BRIDGE
 	tristate "Freescale i.MX8MP HDMI-TX bridge support"
-	depends on OF
 	depends on COMMON_CLK
-	select DRM_DW_HDMI
+	depends on DRM_DW_HDMI
+	depends on OF
 	select DRM_IMX8MP_HDMI_PVI
 	select PHY_FSL_SAMSUNG_HDMI_PHY
 	help
diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c
index 1c3433b5e366..925e42f46cd8 100644
--- a/drivers/gpu/drm/bridge/ite-it66121.c
+++ b/drivers/gpu/drm/bridge/ite-it66121.c
@@ -1540,12 +1540,6 @@ static int it66121_probe(struct i2c_client *client)
 		return -EINVAL;
 	}
 
-	if (!of_device_is_available(ep)) {
-		of_node_put(ep);
-		dev_err(ctx->dev, "The remote device is disabled\n");
-		return -ENODEV;
-	}
-
 	ctx->next_bridge = of_drm_find_bridge(ep);
 	of_node_put(ep);
 	if (!ctx->next_bridge) {
@@ -1586,13 +1580,18 @@ static int it66121_probe(struct i2c_client *client)
 	ctx->bridge.funcs = &it66121_bridge_funcs;
 	ctx->bridge.of_node = dev->of_node;
 	ctx->bridge.type = DRM_MODE_CONNECTOR_HDMIA;
-	ctx->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HPD;
-
-	ret = devm_request_threaded_irq(dev, client->irq, NULL,	it66121_irq_threaded_handler,
-					IRQF_ONESHOT, dev_name(dev), ctx);
-	if (ret < 0) {
-		dev_err(dev, "Failed to request irq %d:%d\n", client->irq, ret);
-		return ret;
+	ctx->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID;
+	if (client->irq > 0) {
+		ctx->bridge.ops |= DRM_BRIDGE_OP_HPD;
+
+		ret = devm_request_threaded_irq(dev, client->irq, NULL,
+						it66121_irq_threaded_handler,
+						IRQF_ONESHOT, dev_name(dev),
+						ctx);
+		if (ret < 0) {
+			dev_err(dev, "Failed to request irq %d:%d\n", client->irq, ret);
+			return ret;
+		}
 	}
 
 	it66121_audio_codec_init(ctx, dev);
diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig
index 15fc182d05ef..387f5bd86089 100644
--- a/drivers/gpu/drm/bridge/synopsys/Kconfig
+++ b/drivers/gpu/drm/bridge/synopsys/Kconfig
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_DW_HDMI
 	tristate
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select REGMAP_MMIO
 	select CEC_CORE if CEC_NOTIFIER
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index cceb5aab6c83..9f2bc932c371 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -3291,40 +3291,17 @@ static void dw_hdmi_init_hw(struct dw_hdmi *hdmi)
 
 static int dw_hdmi_parse_dt(struct dw_hdmi *hdmi)
 {
-	struct device_node *endpoint;
 	struct device_node *remote;
 
 	if (!hdmi->plat_data->output_port)
 		return 0;
 
-	endpoint = of_graph_get_endpoint_by_regs(hdmi->dev->of_node,
-						 hdmi->plat_data->output_port,
-						 -1);
-	if (!endpoint) {
-		/*
-		 * On platforms whose bindings don't make the output port
-		 * mandatory (such as Rockchip) the plat_data->output_port
-		 * field isn't set, so it's safe to make this a fatal error.
-		 */
-		dev_err(hdmi->dev, "Missing endpoint in port@%u\n",
-			hdmi->plat_data->output_port);
-		return -ENODEV;
-	}
 
-	remote = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
-	if (!remote) {
-		dev_err(hdmi->dev, "Endpoint in port@%u unconnected\n",
-			hdmi->plat_data->output_port);
+	remote = of_graph_get_remote_node(hdmi->dev->of_node,
+					  hdmi->plat_data->output_port,
+					  -1);
+	if (!remote)
 		return -ENODEV;
-	}
-
-	if (!of_device_is_available(remote)) {
-		dev_err(hdmi->dev, "port@%u remote device is disabled\n",
-			hdmi->plat_data->output_port);
-		of_node_put(remote);
-		return -ENODEV;
-	}
 
 	hdmi->next_bridge = of_drm_find_bridge(remote);
 	of_node_put(remote);
diff --git a/drivers/gpu/drm/bridge/thc63lvd1024.c b/drivers/gpu/drm/bridge/thc63lvd1024.c
index d4c1a601bbb5..674efc489e3a 100644
--- a/drivers/gpu/drm/bridge/thc63lvd1024.c
+++ b/drivers/gpu/drm/bridge/thc63lvd1024.c
@@ -123,26 +123,11 @@ static int thc63_parse_dt(struct thc63_dev *thc63)
 	struct device_node *endpoint;
 	struct device_node *remote;
 
-	endpoint = of_graph_get_endpoint_by_regs(thc63->dev->of_node,
-						 THC63_RGB_OUT0, -1);
-	if (!endpoint) {
-		dev_err(thc63->dev, "Missing endpoint in port@%u\n",
-			THC63_RGB_OUT0);
-		return -ENODEV;
-	}
-
-	remote = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
+	remote = of_graph_get_remote_node(thc63->dev->of_node,
+					  THC63_RGB_OUT0, -1);
 	if (!remote) {
-		dev_err(thc63->dev, "Endpoint in port@%u unconnected\n",
-			THC63_RGB_OUT0);
-		return -ENODEV;
-	}
-
-	if (!of_device_is_available(remote)) {
-		dev_err(thc63->dev, "port@%u remote endpoint is disabled\n",
+		dev_err(thc63->dev, "No remote endpoint for port@%u\n",
 			THC63_RGB_OUT0);
-		of_node_put(remote);
 		return -ENODEV;
 	}
 
diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml
index 0857773e5c5f..8bc63912fddb 100644
--- a/drivers/gpu/drm/ci/test.yml
+++ b/drivers/gpu/drm/ci/test.yml
@@ -252,11 +252,11 @@ i915:cml:
 i915:tgl:
   extends:
     - .i915
-  parallel: 8
+  parallel: 5
   variables:
-    DEVICE_TYPE: asus-cx9400-volteer
+    DEVICE_TYPE: acer-cp514-2h-1130g7-volteer
     GPU_VERSION: tgl
-    RUNNER_TAG: mesa-ci-x86-64-lava-asus-cx9400-volteer
+    RUNNER_TAG: mesa-ci-x86-64-lava-acer-cp514-2h-1130g7-volteer
 
 .amdgpu:
   extends:
diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig
index c0f56888c328..01f2a231aa5f 100644
--- a/drivers/gpu/drm/display/Kconfig
+++ b/drivers/gpu/drm/display/Kconfig
@@ -1,31 +1,57 @@
 # SPDX-License-Identifier: MIT
 
-config DRM_DP_AUX_BUS
-	tristate
+config DRM_DISPLAY_HELPER
+	tristate "DRM Display Helpers"
+	depends on DRM
+	default y
+	help
+	  DRM helpers for display adapters.
+
+config DRM_DISPLAY_DP_AUX_BUS
+	tristate "DRM DisplayPort AUX bus support"
 	depends on DRM
 	depends on OF || COMPILE_TEST
+	default y
 
-config DRM_DISPLAY_HELPER
-	tristate
+config DRM_DISPLAY_DP_AUX_CEC
+	bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
 	depends on DRM
+	depends on DRM_DISPLAY_HELPER
+	depends on DRM_DISPLAY_DP_HELPER
+	select CEC_CORE
 	help
-	  DRM helpers for display adapters.
+	  Choose this option if you want to enable HDMI CEC support for
+	  DisplayPort/USB-C to HDMI adapters.
+
+	  Note: not all adapters support this feature, and even for those
+	  that do support this they often do not hook up the CEC pin.
+
+config DRM_DISPLAY_DP_AUX_CHARDEV
+	bool "DRM DisplayPort AUX Interface"
+	depends on DRM
+	depends on DRM_DISPLAY_HELPER
+	depends on DRM_DISPLAY_DP_HELPER
+	help
+	  Choose this option to enable a /dev/drm_dp_auxN node that allows to
+	  read and write values to arbitrary DPCD registers on the DP aux
+	  channel.
 
 config DRM_DISPLAY_DP_HELPER
-	bool
+	bool "DRM DisplayPort Helpers"
 	depends on DRM_DISPLAY_HELPER
+	default y
 	help
 	  DRM display helpers for DisplayPort.
 
 config DRM_DISPLAY_DP_TUNNEL
-	bool
-	select DRM_DISPLAY_DP_HELPER
+	bool "DRM DisplayPort tunnels support"
+	depends on DRM_DISPLAY_DP_HELPER
 	help
 	  Enable support for DisplayPort tunnels. This allows drivers to use
 	  DP tunnel features like the Bandwidth Allocation mode to maximize the
 	  BW utilization for display streams on Thunderbolt links.
 
-config DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
+config DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
 	bool "Enable debugging the DP tunnel state"
 	depends on REF_TRACKER
 	depends on DRM_DISPLAY_DP_TUNNEL
@@ -39,34 +65,15 @@ config DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
 	  If in doubt, say "N".
 
 config DRM_DISPLAY_HDCP_HELPER
-	bool
+	bool "DRM HDCD Helpers"
 	depends on DRM_DISPLAY_HELPER
+	default y
 	help
 	  DRM display helpers for HDCP.
 
 config DRM_DISPLAY_HDMI_HELPER
-	bool
+	bool "DRM HDMI Helpers"
 	depends on DRM_DISPLAY_HELPER
+	default y
 	help
 	  DRM display helpers for HDMI.
-
-config DRM_DP_AUX_CHARDEV
-	bool "DRM DP AUX Interface"
-	depends on DRM && DRM_DISPLAY_HELPER
-	select DRM_DISPLAY_DP_HELPER
-	help
-	  Choose this option to enable a /dev/drm_dp_auxN node that allows to
-	  read and write values to arbitrary DPCD registers on the DP aux
-	  channel.
-
-config DRM_DP_CEC
-	bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
-	depends on DRM && DRM_DISPLAY_HELPER
-	select DRM_DISPLAY_DP_HELPER
-	select CEC_CORE
-	help
-	  Choose this option if you want to enable HDMI CEC support for
-	  DisplayPort/USB-C to HDMI adapters.
-
-	  Note: not all adapters support this feature, and even for those
-	  that do support this they often do not hook up the CEC pin.
diff --git a/drivers/gpu/drm/display/Makefile b/drivers/gpu/drm/display/Makefile
index 7ca61333c669..17d2cc73ff56 100644
--- a/drivers/gpu/drm/display/Makefile
+++ b/drivers/gpu/drm/display/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
 
-obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o
+obj-$(CONFIG_DRM_DISPLAY_DP_AUX_BUS) += drm_dp_aux_bus.o
 
 drm_display_helper-y := drm_display_helper_mod.o
 drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_HELPER) += \
@@ -14,7 +14,7 @@ drm_display_helper-$(CONFIG_DRM_DISPLAY_HDCP_HELPER) += drm_hdcp_helper.o
 drm_display_helper-$(CONFIG_DRM_DISPLAY_HDMI_HELPER) += \
 	drm_hdmi_helper.o \
 	drm_scdc_helper.o
-drm_display_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
-drm_display_helper-$(CONFIG_DRM_DP_CEC) += drm_dp_cec.o
+drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
+drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_AUX_CEC) += drm_dp_cec.o
 
 obj-$(CONFIG_DRM_DISPLAY_HELPER) += drm_display_helper.o
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index f5d4be897866..88dca26189b9 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -2113,7 +2113,7 @@ EXPORT_SYMBOL(drm_dp_aux_init);
  * drm_dp_aux_register() in &drm_connector_funcs.late_register, and likewise to
  * call drm_dp_aux_unregister() in &drm_connector_funcs.early_unregister.
  * Functions which don't follow this will likely Oops when
- * %CONFIG_DRM_DP_AUX_CHARDEV is enabled.
+ * %CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV is enabled.
  *
  * For devices where the AUX channel is a device that exists independently of
  * the &drm_device that uses it, such as SoCs and bridge devices, it is
diff --git a/drivers/gpu/drm/display/drm_dp_helper_internal.h b/drivers/gpu/drm/display/drm_dp_helper_internal.h
index 8917fc3af9ec..737949a2820f 100644
--- a/drivers/gpu/drm/display/drm_dp_helper_internal.h
+++ b/drivers/gpu/drm/display/drm_dp_helper_internal.h
@@ -5,7 +5,7 @@
 
 struct drm_dp_aux;
 
-#ifdef CONFIG_DRM_DP_AUX_CHARDEV
+#ifdef CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV
 int drm_dp_aux_dev_init(void);
 void drm_dp_aux_dev_exit(void);
 int drm_dp_aux_register_devnode(struct drm_dp_aux *aux);
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology_internal.h b/drivers/gpu/drm/display/drm_dp_mst_topology_internal.h
index a785ccbfdd73..f41c34e26be2 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology_internal.h
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology_internal.h
@@ -10,7 +10,9 @@
 #ifndef _DRM_DP_MST_HELPER_INTERNAL_H_
 #define _DRM_DP_MST_HELPER_INTERNAL_H_
 
-#include <drm/display/drm_dp_mst_helper.h>
+struct drm_dp_sideband_msg_req_body;
+struct drm_dp_sideband_msg_tx;
+struct drm_printer;
 
 void
 drm_dp_encode_sideband_req(const struct drm_dp_sideband_msg_req_body *req,
diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c
index 120e0de674c1..2a91e9b11d03 100644
--- a/drivers/gpu/drm/display/drm_dp_tunnel.c
+++ b/drivers/gpu/drm/display/drm_dp_tunnel.c
@@ -191,7 +191,7 @@ struct drm_dp_tunnel_mgr {
 	struct drm_dp_tunnel_group *groups;
 	wait_queue_head_t bw_req_queue;
 
-#ifdef CONFIG_DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
+#ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
 	struct ref_tracker_dir ref_tracker;
 #endif
 };
@@ -385,7 +385,7 @@ static void tunnel_put(struct drm_dp_tunnel *tunnel)
 	kref_put(&tunnel->kref, free_tunnel);
 }
 
-#ifdef CONFIG_DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
+#ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
 static void track_tunnel_ref(struct drm_dp_tunnel *tunnel,
 			     struct ref_tracker **tracker)
 {
@@ -1603,7 +1603,7 @@ static void cleanup_group(struct drm_dp_tunnel_group *group)
 	drm_atomic_private_obj_fini(&group->base);
 }
 
-#ifdef CONFIG_DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
+#ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
 static void check_unique_stream_ids(const struct drm_dp_tunnel_group_state *group_state)
 {
 	const struct drm_dp_tunnel_state *tunnel_state;
@@ -1881,7 +1881,7 @@ static void destroy_mgr(struct drm_dp_tunnel_mgr *mgr)
 		drm_WARN_ON(mgr->dev, !list_empty(&mgr->groups[i].tunnels));
 	}
 
-#ifdef CONFIG_DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
+#ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
 	ref_tracker_dir_exit(&mgr->ref_tracker);
 #endif
 
@@ -1918,7 +1918,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count)
 		return NULL;
 	}
 
-#ifdef CONFIG_DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE
+#ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
 	ref_tracker_dir_init(&mgr->ref_tracker, 16, "dptun");
 #endif
 
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index 521a71c61b16..28abe9aa99ca 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -657,6 +657,13 @@ static void drm_atomic_bridge_call_post_disable(struct drm_bridge *bridge,
  * bridge will be called before the previous one to reverse the @pre_enable
  * calling direction.
  *
+ * Example:
+ * Bridge A ---> Bridge B ---> Bridge C ---> Bridge D ---> Bridge E
+ *
+ * With pre_enable_prev_first flag enable in Bridge B, D, E then the resulting
+ * @post_disable order would be,
+ * Bridge B, Bridge A, Bridge E, Bridge D, Bridge C.
+ *
  * Note: the bridge passed should be the one closest to the encoder
  */
 void drm_atomic_bridge_chain_post_disable(struct drm_bridge *bridge,
@@ -687,11 +694,17 @@ void drm_atomic_bridge_chain_post_disable(struct drm_bridge *bridge,
 				 */
 				list_for_each_entry_from(next, &encoder->bridge_chain,
 							 chain_node) {
-					if (next->pre_enable_prev_first) {
+					if (!next->pre_enable_prev_first) {
 						next = list_prev_entry(next, chain_node);
 						limit = next;
 						break;
 					}
+
+					if (list_is_last(&next->chain_node,
+							 &encoder->bridge_chain)) {
+						limit = next;
+						break;
+					}
 				}
 
 				/* Call these bridges in reverse order */
@@ -747,6 +760,13 @@ static void drm_atomic_bridge_call_pre_enable(struct drm_bridge *bridge,
  * If a bridge sets @pre_enable_prev_first, then the pre_enable for the
  * prev bridge will be called before pre_enable of this bridge.
  *
+ * Example:
+ * Bridge A ---> Bridge B ---> Bridge C ---> Bridge D ---> Bridge E
+ *
+ * With pre_enable_prev_first flag enable in Bridge B, D, E then the resulting
+ * @pre_enable order would be,
+ * Bridge C, Bridge D, Bridge E, Bridge A, Bridge B.
+ *
  * Note: the bridge passed should be the one closest to the encoder
  */
 void drm_atomic_bridge_chain_pre_enable(struct drm_bridge *bridge,
@@ -774,7 +794,7 @@ void drm_atomic_bridge_chain_pre_enable(struct drm_bridge *bridge,
 					/* Found first bridge that does NOT
 					 * request prev to be enabled first
 					 */
-					limit = list_prev_entry(next, chain_node);
+					limit = next;
 					break;
 				}
 			}
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 9403b3f576f7..77fe217aeaf3 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -305,6 +305,66 @@ err_delete:
 }
 
 /**
+ * drm_client_buffer_vmap_local - Map DRM client buffer into address space
+ * @buffer: DRM client buffer
+ * @map_copy: Returns the mapped memory's address
+ *
+ * This function maps a client buffer into kernel address space. If the
+ * buffer is already mapped, it returns the existing mapping's address.
+ *
+ * Client buffer mappings are not ref'counted. Each call to
+ * drm_client_buffer_vmap_local() should be closely followed by a call to
+ * drm_client_buffer_vunmap_local(). See drm_client_buffer_vmap() for
+ * long-term mappings.
+ *
+ * The returned address is a copy of the internal value. In contrast to
+ * other vmap interfaces, you don't need it for the client's vunmap
+ * function. So you can modify it at will during blit and draw operations.
+ *
+ * Returns:
+ *	0 on success, or a negative errno code otherwise.
+ */
+int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer,
+				 struct iosys_map *map_copy)
+{
+	struct drm_gem_object *gem = buffer->gem;
+	struct iosys_map *map = &buffer->map;
+	int ret;
+
+	drm_gem_lock(gem);
+
+	ret = drm_gem_vmap(gem, map);
+	if (ret)
+		goto err_drm_gem_vmap_unlocked;
+	*map_copy = *map;
+
+	return 0;
+
+err_drm_gem_vmap_unlocked:
+	drm_gem_unlock(gem);
+	return 0;
+}
+EXPORT_SYMBOL(drm_client_buffer_vmap_local);
+
+/**
+ * drm_client_buffer_vunmap_local - Unmap DRM client buffer
+ * @buffer: DRM client buffer
+ *
+ * This function removes a client buffer's memory mapping established
+ * with drm_client_buffer_vunmap_local(). Calling this function is only
+ * required by clients that manage their buffer mappings by themselves.
+ */
+void drm_client_buffer_vunmap_local(struct drm_client_buffer *buffer)
+{
+	struct drm_gem_object *gem = buffer->gem;
+	struct iosys_map *map = &buffer->map;
+
+	drm_gem_vunmap(gem, map);
+	drm_gem_unlock(gem);
+}
+EXPORT_SYMBOL(drm_client_buffer_vunmap_local);
+
+/**
  * drm_client_buffer_vmap - Map DRM client buffer into address space
  * @buffer: DRM client buffer
  * @map_copy: Returns the mapped memory's address
@@ -328,24 +388,30 @@ int
 drm_client_buffer_vmap(struct drm_client_buffer *buffer,
 		       struct iosys_map *map_copy)
 {
+	struct drm_gem_object *gem = buffer->gem;
 	struct iosys_map *map = &buffer->map;
 	int ret;
 
-	/*
-	 * FIXME: The dependency on GEM here isn't required, we could
-	 * convert the driver handle to a dma-buf instead and use the
-	 * backend-agnostic dma-buf vmap support instead. This would
-	 * require that the handle2fd prime ioctl is reworked to pull the
-	 * fd_install step out of the driver backend hooks, to make that
-	 * final step optional for internal users.
-	 */
-	ret = drm_gem_vmap_unlocked(buffer->gem, map);
+	drm_gem_lock(gem);
+
+	ret = drm_gem_pin_locked(gem);
 	if (ret)
-		return ret;
+		goto err_drm_gem_pin_locked;
+	ret = drm_gem_vmap(gem, map);
+	if (ret)
+		goto err_drm_gem_vmap;
+
+	drm_gem_unlock(gem);
 
 	*map_copy = *map;
 
 	return 0;
+
+err_drm_gem_vmap:
+	drm_gem_unpin_locked(buffer->gem);
+err_drm_gem_pin_locked:
+	drm_gem_unlock(gem);
+	return ret;
 }
 EXPORT_SYMBOL(drm_client_buffer_vmap);
 
@@ -359,9 +425,13 @@ EXPORT_SYMBOL(drm_client_buffer_vmap);
  */
 void drm_client_buffer_vunmap(struct drm_client_buffer *buffer)
 {
+	struct drm_gem_object *gem = buffer->gem;
 	struct iosys_map *map = &buffer->map;
 
-	drm_gem_vunmap_unlocked(buffer->gem, map);
+	drm_gem_lock(gem);
+	drm_gem_vunmap(gem, map);
+	drm_gem_unpin_locked(gem);
+	drm_gem_unlock(gem);
 }
 EXPORT_SYMBOL(drm_client_buffer_vunmap);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper_internal.h b/drivers/gpu/drm/drm_crtc_helper_internal.h
index 28e04e750130..8059f65c5d6c 100644
--- a/drivers/gpu/drm/drm_crtc_helper_internal.h
+++ b/drivers/gpu/drm/drm_crtc_helper_internal.h
@@ -26,10 +26,15 @@
  * implementation details and are not exported to drivers.
  */
 
-#include <drm/drm_connector.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_encoder.h>
-#include <drm/drm_modes.h>
+#ifndef __DRM_CRTC_HELPER_INTERNAL_H__
+#define __DRM_CRTC_HELPER_INTERNAL_H__
+
+enum drm_mode_status;
+struct drm_connector;
+struct drm_crtc;
+struct drm_display_mode;
+struct drm_encoder;
+struct drm_modeset_acquire_ctx;
 
 /* drm_probe_helper.c */
 enum drm_mode_status drm_crtc_mode_valid(struct drm_crtc *crtc,
@@ -44,3 +49,5 @@ drm_connector_mode_valid(struct drm_connector *connector,
 
 struct drm_encoder *
 drm_connector_get_single_encoder(struct drm_connector *connector);
+
+#endif /* __DRM_CRTC_HELPER_INTERNAL_H__ */
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index a514d5207e41..0c693229a1c9 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -32,6 +32,10 @@
  * and are not exported to drivers.
  */
 
+#ifndef __DRM_CRTC_INTERNAL_H__
+#define __DRM_CRTC_INTERNAL_H__
+
+#include <linux/err.h>
 #include <linux/types.h>
 
 enum drm_color_encoding;
@@ -54,6 +58,7 @@ struct drm_mode_object;
 struct drm_mode_set;
 struct drm_plane;
 struct drm_plane_state;
+struct drm_printer;
 struct drm_property;
 struct edid;
 struct fwnode_handle;
@@ -303,3 +308,5 @@ drm_edid_load_firmware(struct drm_connector *connector)
 	return ERR_PTR(-ENOENT);
 }
 #endif
+
+#endif /* __DRM_CRTC_INTERNAL_H__ */
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 923c4423151c..ea77577a3786 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -102,6 +102,11 @@ struct detailed_mode_closure {
 	int modes;
 };
 
+struct drm_edid_match_closure {
+	const struct drm_edid_ident *ident;
+	bool matched;
+};
+
 #define LEVEL_DMT	0
 #define LEVEL_GTF	1
 #define LEVEL_GTF2	2
@@ -109,13 +114,15 @@ struct detailed_mode_closure {
 
 #define EDID_QUIRK(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _quirks) \
 { \
-	.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \
-					     product_id), \
+	.ident = { \
+		.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, \
+						     vend_chr_2, product_id), \
+	}, \
 	.quirks = _quirks \
 }
 
 static const struct edid_quirk {
-	u32 panel_id;
+	const struct drm_edid_ident ident;
 	u32 quirks;
 } edid_quirk_list[] = {
 	/* Acer AL1706 */
@@ -2749,8 +2756,27 @@ const struct drm_edid *drm_edid_read(struct drm_connector *connector)
 }
 EXPORT_SYMBOL(drm_edid_read);
 
-static u32 edid_extract_panel_id(const struct edid *edid)
+/**
+ * drm_edid_get_panel_id - Get a panel's ID from EDID
+ * @drm_edid: EDID that contains panel ID.
+ *
+ * This function uses the first block of the EDID of a panel and (assuming
+ * that the EDID is valid) extracts the ID out of it. The ID is a 32-bit value
+ * (16 bits of manufacturer ID and 16 bits of per-manufacturer ID) that's
+ * supposed to be different for each different modem of panel.
+ *
+ * Return: A 32-bit ID that should be different for each make/model of panel.
+ *         See the functions drm_edid_encode_panel_id() and
+ *         drm_edid_decode_panel_id() for some details on the structure of this
+ *         ID. Return 0 if the EDID size is less than a base block.
+ */
+u32 drm_edid_get_panel_id(const struct drm_edid *drm_edid)
 {
+	const struct edid *edid = drm_edid->edid;
+
+	if (drm_edid->size < EDID_LENGTH)
+		return 0;
+
 	/*
 	 * We represent the ID as a 32-bit number so it can easily be compared
 	 * with "==".
@@ -2768,60 +2794,54 @@ static u32 edid_extract_panel_id(const struct edid *edid)
 	       (u32)edid->mfg_id[1] << 16   |
 	       (u32)EDID_PRODUCT_ID(edid);
 }
+EXPORT_SYMBOL(drm_edid_get_panel_id);
 
 /**
- * drm_edid_get_panel_id - Get a panel's ID through DDC
+ * drm_edid_read_base_block - Get a panel's EDID base block
  * @adapter: I2C adapter to use for DDC
  *
- * This function reads the first block of the EDID of a panel and (assuming
- * that the EDID is valid) extracts the ID out of it. The ID is a 32-bit value
- * (16 bits of manufacturer ID and 16 bits of per-manufacturer ID) that's
- * supposed to be different for each different modem of panel.
+ * This function returns the drm_edid containing the first block of the EDID of
+ * a panel.
  *
  * This function is intended to be used during early probing on devices where
  * more than one panel might be present. Because of its intended use it must
- * assume that the EDID of the panel is correct, at least as far as the ID
- * is concerned (in other words, we don't process any overrides here).
+ * assume that the EDID of the panel is correct, at least as far as the base
+ * block is concerned (in other words, we don't process any overrides here).
+ *
+ * Caller should call drm_edid_free() after use.
  *
  * NOTE: it's expected that this function and drm_do_get_edid() will both
  * be read the EDID, but there is no caching between them. Since we're only
  * reading the first block, hopefully this extra overhead won't be too big.
  *
- * Return: A 32-bit ID that should be different for each make/model of panel.
- *         See the functions drm_edid_encode_panel_id() and
- *         drm_edid_decode_panel_id() for some details on the structure of this
- *         ID.
+ * WARNING: Only use this function when the connector is unknown. For example,
+ * during the early probe of panel. The EDID read from the function is temporary
+ * and should be replaced by the full EDID returned from other drm_edid_read.
+ *
+ * Return: Pointer to allocated EDID base block, or NULL on any failure.
  */
-
-u32 drm_edid_get_panel_id(struct i2c_adapter *adapter)
+const struct drm_edid *drm_edid_read_base_block(struct i2c_adapter *adapter)
 {
 	enum edid_block_status status;
 	void *base_block;
-	u32 panel_id = 0;
-
-	/*
-	 * There are no manufacturer IDs of 0, so if there is a problem reading
-	 * the EDID then we'll just return 0.
-	 */
 
 	base_block = kzalloc(EDID_LENGTH, GFP_KERNEL);
 	if (!base_block)
-		return 0;
+		return NULL;
 
 	status = edid_block_read(base_block, 0, drm_do_probe_ddc_edid, adapter);
 
 	edid_block_status_print(status, base_block, 0);
 
-	if (edid_block_status_valid(status, edid_block_tag(base_block)))
-		panel_id = edid_extract_panel_id(base_block);
-	else
+	if (!edid_block_status_valid(status, edid_block_tag(base_block))) {
 		edid_block_dump(KERN_NOTICE, base_block, 0);
+		kfree(base_block);
+		return NULL;
+	}
 
-	kfree(base_block);
-
-	return panel_id;
+	return _drm_edid_alloc(base_block, EDID_LENGTH);
 }
-EXPORT_SYMBOL(drm_edid_get_panel_id);
+EXPORT_SYMBOL(drm_edid_read_base_block);
 
 /**
  * drm_get_edid_switcheroo - get EDID data for a vga_switcheroo output
@@ -2903,16 +2923,17 @@ EXPORT_SYMBOL(drm_edid_duplicate);
  * @drm_edid: EDID to process
  *
  * This tells subsequent routines what fixes they need to apply.
+ *
+ * Return: A u32 represents the quirks to apply.
  */
 static u32 edid_get_quirks(const struct drm_edid *drm_edid)
 {
-	u32 panel_id = edid_extract_panel_id(drm_edid->edid);
 	const struct edid_quirk *quirk;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(edid_quirk_list); i++) {
 		quirk = &edid_quirk_list[i];
-		if (quirk->panel_id == panel_id)
+		if (drm_edid_match(drm_edid, &quirk->ident))
 			return quirk->quirks;
 	}
 
@@ -5443,6 +5464,66 @@ drm_parse_hdmi_vsdb_audio(struct drm_connector *connector, const u8 *db)
 }
 
 static void
+match_identity(const struct detailed_timing *timing, void *data)
+{
+	struct drm_edid_match_closure *closure = data;
+	unsigned int i;
+	const char *name = closure->ident->name;
+	unsigned int name_len = strlen(name);
+	const char *desc = timing->data.other_data.data.str.str;
+	unsigned int desc_len = ARRAY_SIZE(timing->data.other_data.data.str.str);
+
+	if (name_len > desc_len ||
+	    !(is_display_descriptor(timing, EDID_DETAIL_MONITOR_NAME) ||
+	      is_display_descriptor(timing, EDID_DETAIL_MONITOR_STRING)))
+		return;
+
+	if (strncmp(name, desc, name_len))
+		return;
+
+	for (i = name_len; i < desc_len; i++) {
+		if (desc[i] == '\n')
+			break;
+		/* Allow white space before EDID string terminator. */
+		if (!isspace(desc[i]))
+			return;
+	}
+
+	closure->matched = true;
+}
+
+/**
+ * drm_edid_match - match drm_edid with given identity
+ * @drm_edid: EDID
+ * @ident: the EDID identity to match with
+ *
+ * Check if the EDID matches with the given identity.
+ *
+ * Return: True if the given identity matched with EDID, false otherwise.
+ */
+bool drm_edid_match(const struct drm_edid *drm_edid,
+		    const struct drm_edid_ident *ident)
+{
+	if (!drm_edid || drm_edid_get_panel_id(drm_edid) != ident->panel_id)
+		return false;
+
+	/* Match with name only if it's not NULL. */
+	if (ident->name) {
+		struct drm_edid_match_closure closure = {
+			.ident = ident,
+			.matched = false,
+		};
+
+		drm_for_each_detailed_block(drm_edid, match_identity, &closure);
+
+		return closure.matched;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(drm_edid_match);
+
+static void
 monitor_name(const struct detailed_timing *timing, void *data)
 {
 	const char **res = data;
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c
index d647d89764cb..be357f926fae 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -197,14 +197,14 @@ static int drm_fbdev_generic_damage_blit(struct drm_fb_helper *fb_helper,
 	 */
 	mutex_lock(&fb_helper->lock);
 
-	ret = drm_client_buffer_vmap(buffer, &map);
+	ret = drm_client_buffer_vmap_local(buffer, &map);
 	if (ret)
 		goto out;
 
 	dst = map;
 	drm_fbdev_generic_damage_blit_real(fb_helper, clip, &dst);
 
-	drm_client_buffer_vunmap(buffer);
+	drm_client_buffer_vunmap_local(buffer);
 
 out:
 	mutex_unlock(&fb_helper->lock);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 44a948b80ee1..d4bbc5d109c8 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1161,7 +1161,7 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
 		obj->funcs->print_info(p, indent, obj);
 }
 
-int drm_gem_pin(struct drm_gem_object *obj)
+int drm_gem_pin_locked(struct drm_gem_object *obj)
 {
 	if (obj->funcs->pin)
 		return obj->funcs->pin(obj);
@@ -1169,12 +1169,30 @@ int drm_gem_pin(struct drm_gem_object *obj)
 	return 0;
 }
 
-void drm_gem_unpin(struct drm_gem_object *obj)
+void drm_gem_unpin_locked(struct drm_gem_object *obj)
 {
 	if (obj->funcs->unpin)
 		obj->funcs->unpin(obj);
 }
 
+int drm_gem_pin(struct drm_gem_object *obj)
+{
+	int ret;
+
+	dma_resv_lock(obj->resv, NULL);
+	ret = drm_gem_pin_locked(obj);
+	dma_resv_unlock(obj->resv);
+
+	return ret;
+}
+
+void drm_gem_unpin(struct drm_gem_object *obj)
+{
+	dma_resv_lock(obj->resv, NULL);
+	drm_gem_unpin_locked(obj);
+	dma_resv_unlock(obj->resv);
+}
+
 int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
 	int ret;
@@ -1209,6 +1227,18 @@ void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
 }
 EXPORT_SYMBOL(drm_gem_vunmap);
 
+void drm_gem_lock(struct drm_gem_object *obj)
+{
+	dma_resv_lock(obj->resv, NULL);
+}
+EXPORT_SYMBOL(drm_gem_lock);
+
+void drm_gem_unlock(struct drm_gem_object *obj)
+{
+	dma_resv_unlock(obj->resv);
+}
+EXPORT_SYMBOL(drm_gem_unlock);
+
 int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map)
 {
 	int ret;
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index e435f986cd13..177773bcdbfd 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -10,7 +10,6 @@
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/module.h>
 
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
@@ -228,7 +227,7 @@ void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem)
 }
 EXPORT_SYMBOL(drm_gem_shmem_put_pages);
 
-static int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem)
+int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem)
 {
 	int ret;
 
@@ -238,13 +237,15 @@ static int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem)
 
 	return ret;
 }
+EXPORT_SYMBOL(drm_gem_shmem_pin_locked);
 
-static void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem)
+void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem)
 {
 	dma_resv_assert_held(shmem->base.resv);
 
 	drm_gem_shmem_put_pages(shmem);
 }
+EXPORT_SYMBOL(drm_gem_shmem_unpin_locked);
 
 /**
  * drm_gem_shmem_pin - Pin backing pages for a shmem GEM object
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index 1ac284a9e8ee..6027584406af 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -282,6 +282,8 @@ static int drm_gem_vram_pin_locked(struct drm_gem_vram_object *gbo,
 	struct ttm_operation_ctx ctx = { false, false };
 	int ret;
 
+	dma_resv_assert_held(gbo->bo.base.resv);
+
 	if (gbo->bo.pin_count)
 		goto out;
 
@@ -337,6 +339,8 @@ EXPORT_SYMBOL(drm_gem_vram_pin);
 
 static void drm_gem_vram_unpin_locked(struct drm_gem_vram_object *gbo)
 {
+	dma_resv_assert_held(gbo->bo.base.resv);
+
 	ttm_bo_unpin(&gbo->bo);
 }
 
@@ -363,11 +367,28 @@ int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo)
 }
 EXPORT_SYMBOL(drm_gem_vram_unpin);
 
-static int drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo,
-				    struct iosys_map *map)
+/**
+ * drm_gem_vram_vmap() - Pins and maps a GEM VRAM object into kernel address
+ *                       space
+ * @gbo: The GEM VRAM object to map
+ * @map: Returns the kernel virtual address of the VRAM GEM object's backing
+ *       store.
+ *
+ * The vmap function pins a GEM VRAM object to its current location, either
+ * system or video memory, and maps its buffer into kernel address space.
+ * As pinned object cannot be relocated, you should avoid pinning objects
+ * permanently. Call drm_gem_vram_vunmap() with the returned address to
+ * unmap and unpin the GEM VRAM object.
+ *
+ * Returns:
+ * 0 on success, or a negative error code otherwise.
+ */
+int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct iosys_map *map)
 {
 	int ret;
 
+	dma_resv_assert_held(gbo->bo.base.resv);
+
 	if (gbo->vmap_use_count > 0)
 		goto out;
 
@@ -388,12 +409,23 @@ out:
 
 	return 0;
 }
+EXPORT_SYMBOL(drm_gem_vram_vmap);
 
-static void drm_gem_vram_kunmap_locked(struct drm_gem_vram_object *gbo,
-				       struct iosys_map *map)
+/**
+ * drm_gem_vram_vunmap() - Unmaps and unpins a GEM VRAM object
+ * @gbo: The GEM VRAM object to unmap
+ * @map: Kernel virtual address where the VRAM GEM object was mapped
+ *
+ * A call to drm_gem_vram_vunmap() unmaps and unpins a GEM VRAM buffer. See
+ * the documentation for drm_gem_vram_vmap() for more information.
+ */
+void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo,
+			 struct iosys_map *map)
 {
 	struct drm_device *dev = gbo->bo.base.dev;
 
+	dma_resv_assert_held(gbo->bo.base.resv);
+
 	if (drm_WARN_ON_ONCE(dev, !gbo->vmap_use_count))
 		return;
 
@@ -410,60 +442,6 @@ static void drm_gem_vram_kunmap_locked(struct drm_gem_vram_object *gbo,
 	 * from memory. See drm_gem_vram_bo_driver_move_notify().
 	 */
 }
-
-/**
- * drm_gem_vram_vmap() - Pins and maps a GEM VRAM object into kernel address
- *                       space
- * @gbo: The GEM VRAM object to map
- * @map: Returns the kernel virtual address of the VRAM GEM object's backing
- *       store.
- *
- * The vmap function pins a GEM VRAM object to its current location, either
- * system or video memory, and maps its buffer into kernel address space.
- * As pinned object cannot be relocated, you should avoid pinning objects
- * permanently. Call drm_gem_vram_vunmap() with the returned address to
- * unmap and unpin the GEM VRAM object.
- *
- * Returns:
- * 0 on success, or a negative error code otherwise.
- */
-int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct iosys_map *map)
-{
-	int ret;
-
-	dma_resv_assert_held(gbo->bo.base.resv);
-
-	ret = drm_gem_vram_pin_locked(gbo, 0);
-	if (ret)
-		return ret;
-	ret = drm_gem_vram_kmap_locked(gbo, map);
-	if (ret)
-		goto err_drm_gem_vram_unpin_locked;
-
-	return 0;
-
-err_drm_gem_vram_unpin_locked:
-	drm_gem_vram_unpin_locked(gbo);
-	return ret;
-}
-EXPORT_SYMBOL(drm_gem_vram_vmap);
-
-/**
- * drm_gem_vram_vunmap() - Unmaps and unpins a GEM VRAM object
- * @gbo: The GEM VRAM object to unmap
- * @map: Kernel virtual address where the VRAM GEM object was mapped
- *
- * A call to drm_gem_vram_vunmap() unmaps and unpins a GEM VRAM buffer. See
- * the documentation for drm_gem_vram_vmap() for more information.
- */
-void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo,
-			 struct iosys_map *map)
-{
-	dma_resv_assert_held(gbo->bo.base.resv);
-
-	drm_gem_vram_kunmap_locked(gbo, map);
-	drm_gem_vram_unpin_locked(gbo);
-}
 EXPORT_SYMBOL(drm_gem_vram_vunmap);
 
 /**
@@ -768,7 +746,8 @@ static int drm_gem_vram_object_pin(struct drm_gem_object *gem)
 {
 	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
 
-	/* Fbdev console emulation is the use case of these PRIME
+	/*
+	 * Fbdev console emulation is the use case of these PRIME
 	 * helpers. This may involve updating a hardware buffer from
 	 * a shadow FB. We pin the buffer to it's current location
 	 * (either video RAM or system memory) to prevent it from
@@ -776,7 +755,7 @@ static int drm_gem_vram_object_pin(struct drm_gem_object *gem)
 	 * the buffer to be pinned to VRAM, implement a callback that
 	 * sets the flags accordingly.
 	 */
-	return drm_gem_vram_pin(gbo, 0);
+	return drm_gem_vram_pin_locked(gbo, 0);
 }
 
 /**
@@ -787,7 +766,7 @@ static void drm_gem_vram_object_unpin(struct drm_gem_object *gem)
 {
 	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
 
-	drm_gem_vram_unpin(gbo);
+	drm_gem_vram_unpin_locked(gbo);
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 8e4faf0a28e6..2215baef9a3e 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -21,6 +21,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#ifndef __DRM_INTERNAL_H__
+#define __DRM_INTERNAL_H__
+
 #include <linux/kthread.h>
 #include <linux/types.h>
 
@@ -170,6 +173,8 @@ void drm_gem_release(struct drm_device *dev, struct drm_file *file_private);
 void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
 			const struct drm_gem_object *obj);
 
+int drm_gem_pin_locked(struct drm_gem_object *obj);
+void drm_gem_unpin_locked(struct drm_gem_object *obj);
 int drm_gem_pin(struct drm_gem_object *obj);
 void drm_gem_unpin(struct drm_gem_object *obj);
 int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map);
@@ -276,3 +281,5 @@ void drm_framebuffer_debugfs_init(struct drm_device *dev);
 /* drm_edid.c */
 void drm_edid_cta_sad_get(const struct cea_sad *cta_sad, u8 *sad);
 void drm_edid_cta_sad_set(struct cea_sad *cta_sad, const u8 *sad);
+
+#endif /* __DRM_INTERNAL_H__ */
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 733b109a5095..6a26a0b8eff2 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -4,7 +4,6 @@ config DRM_EXYNOS
 	depends on OF && DRM && COMMON_CLK
 	depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	depends on MMU
-	select DRM_DISPLAY_HELPER if DRM_EXYNOS_DP
 	select DRM_KMS_HELPER
 	select VIDEOMODE_HELPERS
 	select FB_DMAMEM_HELPERS if DRM_FBDEV_EMULATION
@@ -68,8 +67,9 @@ config DRM_EXYNOS_DSI
 config DRM_EXYNOS_DP
 	bool "Exynos specific extensions for Analogix DP driver"
 	depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	select DRM_ANALOGIX_DP
-	select DRM_DISPLAY_DP_HELPER
 	default DRM_EXYNOS
 	select DRM_PANEL
 	help
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index d974d0c60d2a..72191d6f0d06 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -11,8 +11,6 @@
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
 
-#include <asm/intel-mid.h>
-
 #include <drm/drm_edid.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_simple_kms_helper.h>
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 5932024f8f95..4f0d18a16b0f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -2,6 +2,10 @@
 config DRM_I915
 	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
 	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDCP_HELPER
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on X86 && PCI
 	depends on !PREEMPT_RT
 	select INTEL_GTT if X86
@@ -10,10 +14,6 @@ config DRM_I915
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
 	select TMPFS
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDCP_HELPER
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index bc18e2d9ea05..d8397065c3f0 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -27,8 +27,8 @@ config DRM_I915_DEBUG
 	select REF_TRACKER
 	select STACKDEPOT
 	select STACKTRACE
-	select DRM_DP_AUX_CHARDEV
-	select DRM_DISPLAY_DEBUG_DP_TUNNEL_STATE if DRM_I915_DP_TUNNEL
+	select DRM_DISPLAY_DP_AUX_CHARDEV
+	select DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG if DRM_I915_DP_TUNNEL
 	select X86_MSR # used by igt/pm_rpm
 	select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
 	select DRM_DEBUG_MM if DRM=y
diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c
index b7fef3c797e6..4f99b4af871c 100644
--- a/drivers/gpu/drm/imagination/pvr_vm_mips.c
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c
@@ -46,7 +46,7 @@ pvr_vm_mips_init(struct pvr_device *pvr_dev)
 	if (!mips_data)
 		return -ENOMEM;
 
-	for (page_nr = 0; page_nr < ARRAY_SIZE(mips_data->pt_pages); page_nr++) {
+	for (page_nr = 0; page_nr < PVR_MIPS_PT_PAGE_COUNT; page_nr++) {
 		mips_data->pt_pages[page_nr] = alloc_page(GFP_KERNEL | __GFP_ZERO);
 		if (!mips_data->pt_pages[page_nr]) {
 			err = -ENOMEM;
@@ -102,7 +102,7 @@ pvr_vm_mips_fini(struct pvr_device *pvr_dev)
 	int page_nr;
 
 	vunmap(mips_data->pt);
-	for (page_nr = ARRAY_SIZE(mips_data->pt_pages) - 1; page_nr >= 0; page_nr--) {
+	for (page_nr = PVR_MIPS_PT_PAGE_COUNT - 1; page_nr >= 0; page_nr--) {
 		dma_unmap_page(from_pvr_device(pvr_dev)->dev,
 			       mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE);
 
diff --git a/drivers/gpu/drm/imx/ipuv3/Kconfig b/drivers/gpu/drm/imx/ipuv3/Kconfig
index bacf0655ebaf..5d810ac02171 100644
--- a/drivers/gpu/drm/imx/ipuv3/Kconfig
+++ b/drivers/gpu/drm/imx/ipuv3/Kconfig
@@ -35,7 +35,8 @@ config DRM_IMX_LDB
 
 config DRM_IMX_HDMI
 	tristate "Freescale i.MX DRM HDMI"
-	select DRM_DW_HDMI
-	depends on DRM_IMX && OF
+	depends on DRM_DW_HDMI
+	depends on DRM_IMX
+	depends on OF
 	help
 	  Choose this if you want to use HDMI on i.MX6.
diff --git a/drivers/gpu/drm/ingenic/Kconfig b/drivers/gpu/drm/ingenic/Kconfig
index 3db117c5edd9..23effeb2ac72 100644
--- a/drivers/gpu/drm/ingenic/Kconfig
+++ b/drivers/gpu/drm/ingenic/Kconfig
@@ -27,8 +27,8 @@ config DRM_INGENIC_IPU
 
 config DRM_INGENIC_DW_HDMI
 	tristate "Ingenic specific support for Synopsys DW HDMI"
+	depends on DRM_DW_HDMI
 	depends on MACH_JZ4780
-	select DRM_DW_HDMI
 	help
 	  Choose this option to enable Synopsys DesignWare HDMI based driver.
 	  If you want to enable HDMI on Ingenic JZ4780 based SoC, you should
diff --git a/drivers/gpu/drm/loongson/lsdc_gem.c b/drivers/gpu/drm/loongson/lsdc_gem.c
index 04293df2f0de..a720d8f53209 100644
--- a/drivers/gpu/drm/loongson/lsdc_gem.c
+++ b/drivers/gpu/drm/loongson/lsdc_gem.c
@@ -19,33 +19,24 @@ static int lsdc_gem_prime_pin(struct drm_gem_object *obj)
 	struct lsdc_bo *lbo = gem_to_lsdc_bo(obj);
 	int ret;
 
-	ret = lsdc_bo_reserve(lbo);
-	if (unlikely(ret))
-		return ret;
+	dma_resv_assert_held(obj->resv);
 
 	ret = lsdc_bo_pin(lbo, LSDC_GEM_DOMAIN_GTT, NULL);
 	if (likely(ret == 0))
 		lbo->sharing_count++;
 
-	lsdc_bo_unreserve(lbo);
-
 	return ret;
 }
 
 static void lsdc_gem_prime_unpin(struct drm_gem_object *obj)
 {
 	struct lsdc_bo *lbo = gem_to_lsdc_bo(obj);
-	int ret;
 
-	ret = lsdc_bo_reserve(lbo);
-	if (unlikely(ret))
-		return;
+	dma_resv_assert_held(obj->resv);
 
 	lsdc_bo_unpin(lbo);
 	if (lbo->sharing_count)
 		lbo->sharing_count--;
-
-	lsdc_bo_unreserve(lbo);
 }
 
 static struct sg_table *lsdc_gem_prime_get_sg_table(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig
index 76cab28e010c..6caab8d4d4e0 100644
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -22,11 +22,11 @@ config DRM_MEDIATEK
 
 config DRM_MEDIATEK_DP
 	tristate "DRM DPTX Support for MediaTek SoCs"
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on DRM_MEDIATEK
 	select PHY_MTK_DP
-	select DRM_DISPLAY_HELPER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DP_AUX_BUS
 	help
 	  DRM/KMS Display Port driver for MediaTek SoCs.
 
diff --git a/drivers/gpu/drm/meson/Kconfig b/drivers/gpu/drm/meson/Kconfig
index 615fdd0ce41b..5520b9e3f010 100644
--- a/drivers/gpu/drm/meson/Kconfig
+++ b/drivers/gpu/drm/meson/Kconfig
@@ -13,9 +13,9 @@ config DRM_MESON
 
 config DRM_MESON_DW_HDMI
 	tristate "HDMI Synopsys Controller support for Amlogic Meson Display"
+	depends on DRM_DW_HDMI
 	depends on DRM_MESON
 	default y if DRM_MESON
-	select DRM_DW_HDMI
 	imply DRM_DW_HDMI_I2S_AUDIO
 
 config DRM_MESON_DW_MIPI_DSI
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index f202f26adab2..f7708590583e 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -2,9 +2,12 @@
 
 config DRM_MSM
 	tristate "MSM DRM"
-	depends on DRM
 	depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST
 	depends on COMMON_CLK
+	depends on DRM
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on IOMMU_SUPPORT
 	depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
 	depends on QCOM_OCMEM || QCOM_OCMEM=n
@@ -14,9 +17,6 @@ config DRM_MSM
 	select IOMMU_IO_PGTABLE
 	select QCOM_MDT_LOADER if ARCH_QCOM
 	select REGULATOR
-	select DRM_DP_AUX_BUS
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_EXEC
 	select DRM_KMS_HELPER
 	select DRM_PANEL
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 175ee4ab8a6f..a5c6498a43f0 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -219,7 +219,7 @@ static void put_pages(struct drm_gem_object *obj)
 	}
 }
 
-static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj,
+static struct page **msm_gem_get_pages_locked(struct drm_gem_object *obj,
 					      unsigned madv)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
@@ -257,24 +257,24 @@ static void pin_obj_locked(struct drm_gem_object *obj)
 	mutex_unlock(&priv->lru.lock);
 }
 
-struct page **msm_gem_pin_pages(struct drm_gem_object *obj)
+struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj)
 {
 	struct page **p;
 
-	msm_gem_lock(obj);
-	p = msm_gem_pin_pages_locked(obj, MSM_MADV_WILLNEED);
+	msm_gem_assert_locked(obj);
+
+	p = msm_gem_get_pages_locked(obj, MSM_MADV_WILLNEED);
 	if (!IS_ERR(p))
 		pin_obj_locked(obj);
-	msm_gem_unlock(obj);
 
 	return p;
 }
 
-void msm_gem_unpin_pages(struct drm_gem_object *obj)
+void msm_gem_unpin_pages_locked(struct drm_gem_object *obj)
 {
-	msm_gem_lock(obj);
+	msm_gem_assert_locked(obj);
+
 	msm_gem_unpin_locked(obj);
-	msm_gem_unlock(obj);
 }
 
 static pgprot_t msm_gem_pgprot(struct msm_gem_object *msm_obj, pgprot_t prot)
@@ -489,7 +489,7 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma)
 
 	msm_gem_assert_locked(obj);
 
-	pages = msm_gem_pin_pages_locked(obj, MSM_MADV_WILLNEED);
+	pages = msm_gem_get_pages_locked(obj, MSM_MADV_WILLNEED);
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
@@ -703,7 +703,7 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
 	if (obj->import_attach)
 		return ERR_PTR(-ENODEV);
 
-	pages = msm_gem_pin_pages_locked(obj, madv);
+	pages = msm_gem_get_pages_locked(obj, madv);
 	if (IS_ERR(pages))
 		return ERR_CAST(pages);
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 8d414b072c29..85f0257e83da 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -140,8 +140,8 @@ int msm_gem_get_and_pin_iova(struct drm_gem_object *obj,
 void msm_gem_unpin_iova(struct drm_gem_object *obj,
 		struct msm_gem_address_space *aspace);
 void msm_gem_pin_obj_locked(struct drm_gem_object *obj);
-struct page **msm_gem_pin_pages(struct drm_gem_object *obj);
-void msm_gem_unpin_pages(struct drm_gem_object *obj);
+struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj);
+void msm_gem_unpin_pages_locked(struct drm_gem_object *obj);
 int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 		struct drm_mode_create_dumb *args);
 int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 0915f3b68752..ee267490c935 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -47,13 +47,23 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 
 int msm_gem_prime_pin(struct drm_gem_object *obj)
 {
-	if (!obj->import_attach)
-		msm_gem_pin_pages(obj);
-	return 0;
+	struct page **pages;
+	int ret = 0;
+
+	if (obj->import_attach)
+		return 0;
+
+	pages = msm_gem_pin_pages_locked(obj);
+	if (IS_ERR(pages))
+		ret = PTR_ERR(pages);
+
+	return ret;
 }
 
 void msm_gem_prime_unpin(struct drm_gem_object *obj)
 {
-	if (!obj->import_attach)
-		msm_gem_unpin_pages(obj);
+	if (obj->import_attach)
+		return;
+
+	msm_gem_unpin_pages_locked(obj);
 }
diff --git a/drivers/gpu/drm/mxsfb/lcdif_drv.c b/drivers/gpu/drm/mxsfb/lcdif_drv.c
index ea10bf81582e..0f895b8a99d6 100644
--- a/drivers/gpu/drm/mxsfb/lcdif_drv.c
+++ b/drivers/gpu/drm/mxsfb/lcdif_drv.c
@@ -343,6 +343,9 @@ static int __maybe_unused lcdif_suspend(struct device *dev)
 	if (ret)
 		return ret;
 
+	if (pm_runtime_suspended(dev))
+		return 0;
+
 	return lcdif_rpm_suspend(dev);
 }
 
@@ -350,7 +353,8 @@ static int __maybe_unused lcdif_resume(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 
-	lcdif_rpm_resume(dev);
+	if (!pm_runtime_suspended(dev))
+		lcdif_rpm_resume(dev);
 
 	return drm_mode_config_helper_resume(drm);
 }
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index ceef470c9fbf..4c10b400658c 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -1,12 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_NOUVEAU
 	tristate "Nouveau (NVIDIA) cards"
-	depends on DRM && PCI && MMU
+	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on PCI
+	depends on MMU
 	select IOMMU_API
 	select FW_LOADER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select DRM_TTM
 	select DRM_TTM_HELPER
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 80f74ee0fc78..f465fe93b1f7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -312,11 +312,21 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
 		if (init->fb_ctxdma_handle == ~0) {
 			switch (init->tt_ctxdma_handle) {
-			case 0x01: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR    ; break;
-			case 0x02: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPDEC; break;
-			case 0x04: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPPP ; break;
-			case 0x08: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSVLD ; break;
-			case 0x30: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_CE    ; break;
+			case NOUVEAU_FIFO_ENGINE_GR:
+				engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;
+				break;
+			case NOUVEAU_FIFO_ENGINE_VP:
+				engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPDEC;
+				break;
+			case NOUVEAU_FIFO_ENGINE_PPP:
+				engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPPP;
+				break;
+			case NOUVEAU_FIFO_ENGINE_BSP:
+				engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSVLD;
+				break;
+			case NOUVEAU_FIFO_ENGINE_CE:
+				engine = NV_DEVICE_HOST_RUNLIST_ENGINES_CE;
+				break;
 			default:
 				return nouveau_abi16_put(abi16, -ENOSYS);
 			}
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 11c8c4a80079..661b901d8ecc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -50,18 +50,6 @@ struct drm_nouveau_grobj_alloc {
 	int      class;
 };
 
-struct drm_nouveau_notifierobj_alloc {
-	uint32_t channel;
-	uint32_t handle;
-	uint32_t size;
-	uint32_t offset;
-};
-
-struct drm_nouveau_gpuobj_free {
-	int      channel;
-	uint32_t handle;
-};
-
 struct drm_nouveau_setparam {
 	uint64_t param;
 	uint64_t value;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index db8cbf615112..1e2d28fd10dc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -467,17 +467,14 @@ nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t domain,
 	set_placement_range(nvbo, domain);
 }
 
-int
-nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t domain, bool contig)
+int nouveau_bo_pin_locked(struct nouveau_bo *nvbo, uint32_t domain, bool contig)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct ttm_buffer_object *bo = &nvbo->bo;
 	bool force = false, evict = false;
-	int ret;
+	int ret = 0;
 
-	ret = ttm_bo_reserve(bo, false, false, NULL);
-	if (ret)
-		return ret;
+	dma_resv_assert_held(bo->base.resv);
 
 	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
 	    domain == NOUVEAU_GEM_DOMAIN_VRAM && contig) {
@@ -540,20 +537,15 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t domain, bool contig)
 out:
 	if (force && ret)
 		nvbo->contig = false;
-	ttm_bo_unreserve(bo);
 	return ret;
 }
 
-int
-nouveau_bo_unpin(struct nouveau_bo *nvbo)
+void nouveau_bo_unpin_locked(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct ttm_buffer_object *bo = &nvbo->bo;
-	int ret;
 
-	ret = ttm_bo_reserve(bo, false, false, NULL);
-	if (ret)
-		return ret;
+	dma_resv_assert_held(bo->base.resv);
 
 	ttm_bo_unpin(&nvbo->bo);
 	if (!nvbo->bo.pin_count) {
@@ -568,8 +560,33 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
 			break;
 		}
 	}
+}
+
+int nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t domain, bool contig)
+{
+	struct ttm_buffer_object *bo = &nvbo->bo;
+	int ret;
 
+	ret = ttm_bo_reserve(bo, false, false, NULL);
+	if (ret)
+		return ret;
+	ret = nouveau_bo_pin_locked(nvbo, domain, contig);
+	ttm_bo_unreserve(bo);
+
+	return ret;
+}
+
+int nouveau_bo_unpin(struct nouveau_bo *nvbo)
+{
+	struct ttm_buffer_object *bo = &nvbo->bo;
+	int ret;
+
+	ret = ttm_bo_reserve(bo, false, false, NULL);
+	if (ret)
+		return ret;
+	nouveau_bo_unpin_locked(nvbo);
 	ttm_bo_unreserve(bo);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index e9dfab6a8156..4e891752c255 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -85,6 +85,8 @@ int  nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 domain,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct dma_resv *robj,
 		    struct nouveau_bo **);
+int  nouveau_bo_pin_locked(struct nouveau_bo *nvbo, uint32_t domain, bool contig);
+void nouveau_bo_unpin_locked(struct nouveau_bo *nvbo);
 int  nouveau_bo_pin(struct nouveau_bo *, u32 flags, bool contig);
 int  nouveau_bo_unpin(struct nouveau_bo *);
 int  nouveau_bo_map(struct nouveau_bo *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index 1b2ff0c40fc1..b58ab595faf8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -89,18 +89,18 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
 	int ret;
 
 	/* pin buffer into GTT */
-	ret = nouveau_bo_pin(nvbo, NOUVEAU_GEM_DOMAIN_GART, false);
+	ret = nouveau_bo_pin_locked(nvbo, NOUVEAU_GEM_DOMAIN_GART, false);
 	if (ret)
-		return -EINVAL;
+		ret = -EINVAL;
 
-	return 0;
+	return ret;
 }
 
 void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
 {
 	struct nouveau_bo *nvbo = nouveau_gem_object(obj);
 
-	nouveau_bo_unpin(nvbo);
+	nouveau_bo_unpin_locked(nvbo);
 }
 
 struct dma_buf *nouveau_gem_prime_export(struct drm_gem_object *gobj,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
index 6a0a4d3b8902..027867c2a8c5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
@@ -1080,7 +1080,7 @@ r535_dp_aux_xfer(struct nvkm_outp *outp, u8 type, u32 addr, u8 *data, u8 *psize)
 	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
 	if (ret) {
 		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
-		return PTR_ERR(ctrl);
+		return ret;
 	}
 
 	memcpy(data, ctrl->data, size);
diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig
index b715301ec79f..6c49270cb290 100644
--- a/drivers/gpu/drm/omapdrm/Kconfig
+++ b/drivers/gpu/drm/omapdrm/Kconfig
@@ -4,7 +4,7 @@ config DRM_OMAP
 	depends on DRM && OF
 	depends on ARCH_OMAP2PLUS
 	select DRM_KMS_HELPER
-	select FB_DMAMEM_HELPERS if DRM_FBDEV_EMULATION
+	select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
 	select VIDEOMODE_HELPERS
 	select HDMI
 	default n
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c
index 6b08b137af1a..523be34682ca 100644
--- a/drivers/gpu/drm/omapdrm/omap_fbdev.c
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c
@@ -51,6 +51,10 @@ static void pan_worker(struct work_struct *work)
 	omap_gem_roll(bo, fbi->var.yoffset * npages);
 }
 
+FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(omap_fbdev,
+				   drm_fb_helper_damage_range,
+				   drm_fb_helper_damage_area)
+
 static int omap_fbdev_pan_display(struct fb_var_screeninfo *var,
 		struct fb_info *fbi)
 {
@@ -78,11 +82,9 @@ fallback:
 
 static int omap_fbdev_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
-	struct drm_fb_helper *helper = info->par;
-	struct drm_framebuffer *fb = helper->fb;
-	struct drm_gem_object *bo = drm_gem_fb_get_obj(fb, 0);
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
 
-	return drm_gem_mmap_obj(bo, omap_gem_mmap_size(bo), vma);
+	return fb_deferred_io_mmap(info, vma);
 }
 
 static void omap_fbdev_fb_destroy(struct fb_info *info)
@@ -94,6 +96,7 @@ static void omap_fbdev_fb_destroy(struct fb_info *info)
 
 	DBG();
 
+	fb_deferred_io_cleanup(info);
 	drm_fb_helper_fini(helper);
 
 	omap_gem_unpin(bo);
@@ -104,15 +107,19 @@ static void omap_fbdev_fb_destroy(struct fb_info *info)
 	kfree(fbdev);
 }
 
+/*
+ * For now, we cannot use FB_DEFAULT_DEFERRED_OPS and fb_deferred_io_mmap()
+ * because we use write-combine.
+ */
 static const struct fb_ops omap_fb_ops = {
 	.owner = THIS_MODULE,
-	__FB_DEFAULT_DMAMEM_OPS_RDWR,
+	__FB_DEFAULT_DEFERRED_OPS_RDWR(omap_fbdev),
 	.fb_check_var	= drm_fb_helper_check_var,
 	.fb_set_par	= drm_fb_helper_set_par,
 	.fb_setcmap	= drm_fb_helper_setcmap,
 	.fb_blank	= drm_fb_helper_blank,
 	.fb_pan_display = omap_fbdev_pan_display,
-	__FB_DEFAULT_DMAMEM_OPS_DRAW,
+	__FB_DEFAULT_DEFERRED_OPS_DRAW(omap_fbdev),
 	.fb_ioctl	= drm_fb_helper_ioctl,
 	.fb_mmap	= omap_fbdev_fb_mmap,
 	.fb_destroy	= omap_fbdev_fb_destroy,
@@ -213,6 +220,15 @@ static int omap_fbdev_create(struct drm_fb_helper *helper,
 	fbi->fix.smem_start = dma_addr;
 	fbi->fix.smem_len = bo->size;
 
+	/* deferred I/O */
+	helper->fbdefio.delay = HZ / 20;
+	helper->fbdefio.deferred_io = drm_fb_helper_deferred_io;
+
+	fbi->fbdefio = &helper->fbdefio;
+	ret = fb_deferred_io_init(fbi);
+	if (ret)
+		goto fail;
+
 	/* if we have DMM, then we can use it for scrolling by just
 	 * shuffling pages around in DMM rather than doing sw blit.
 	 */
@@ -238,8 +254,20 @@ fail:
 	return ret;
 }
 
+static int omap_fbdev_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip)
+{
+	if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2))
+		return 0;
+
+	if (helper->fb->funcs->dirty)
+		return helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1);
+
+	return 0;
+}
+
 static const struct drm_fb_helper_funcs omap_fb_helper_funcs = {
 	.fb_probe = omap_fbdev_create,
+	.fb_dirty = omap_fbdev_dirty,
 };
 
 static struct drm_fb_helper *get_fb(struct fb_info *fbi)
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index d037b3b8b999..154f5bf82980 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -533,11 +533,11 @@ config DRM_PANEL_RAYDIUM_RM68200
 
 config DRM_PANEL_RAYDIUM_RM692E5
 	tristate "Raydium RM692E5-based DSI panel"
-	depends on OF
-	depends on DRM_MIPI_DSI
 	depends on BACKLIGHT_CLASS_DEVICE
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on DRM_MIPI_DSI
+	depends on OF
 	help
 	  Say Y here if you want to enable support for Raydium RM692E5-based
 	  display panels, such as the one found in the Fairphone 5 smartphone.
@@ -559,12 +559,12 @@ config DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01
 
 config DRM_PANEL_SAMSUNG_ATNA33XC20
 	tristate "Samsung ATNA33XC20 eDP panel"
-	depends on OF
 	depends on BACKLIGHT_CLASS_DEVICE
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on OF
 	depends on PM
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_DP_AUX_BUS
 	help
 	  DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't
 	  be handled by the DRM_PANEL_SIMPLE driver because its power
@@ -586,6 +586,15 @@ config DRM_PANEL_SAMSUNG_LD9040
 	depends on BACKLIGHT_CLASS_DEVICE
 	select VIDEOMODE_HELPERS
 
+config DRM_PANEL_SAMSUNG_S6E3FA7
+	tristate "Samsung S6E3FA7 panel driver"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for the Samsung S6E3FA7
+	  1920x2220 panel.
+
 config DRM_PANEL_SAMSUNG_S6D16D0
 	tristate "Samsung S6D16D0 DSI video mode panel"
 	depends on OF
@@ -790,13 +799,13 @@ config DRM_PANEL_STARTEK_KD070FHFID015
 
 config DRM_PANEL_EDP
 	tristate "support for simple Embedded DisplayPort panels"
-	depends on OF
 	depends on BACKLIGHT_CLASS_DEVICE
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on OF
 	depends on PM
 	select VIDEOMODE_HELPERS
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_DP_AUX_BUS
 	select DRM_KMS_HELPER
 	help
 	  DRM panel driver for dumb eDP panels that need at most a regulator and
@@ -870,11 +879,11 @@ config DRM_PANEL_TRULY_NT35597_WQXGA
 
 config DRM_PANEL_VISIONOX_R66451
 	tristate "Visionox R66451"
-	depends on OF
-	depends on DRM_MIPI_DSI
 	depends on BACKLIGHT_CLASS_DEVICE
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on DRM_MIPI_DSI
+	depends on OF
 	help
 	  Say Y here if you want to enable support for Visionox
 	  R66451 1080x2340 AMOLED DSI panel.
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index f156d7fa0bcc..24a02655d726 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D27A1) += panel-samsung-s6d27a1.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0) += panel-samsung-s6d7aa0.o
+obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3FA7) += panel-samsung-s6e3fa7.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03) += panel-samsung-s6e63j0x03.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0) += panel-samsung-s6e63m0.o
diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index d58f90bc48fb..1a4a1ffea2c1 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -210,15 +210,12 @@ struct panel_desc {
  * struct edp_panel_entry - Maps panel ID to delay / panel name.
  */
 struct edp_panel_entry {
-	/** @panel_id: 32-bit ID for panel, encoded with drm_edid_encode_panel_id(). */
-	u32 panel_id;
+	/** @ident: edid identity used for panel matching. */
+	const struct drm_edid_ident ident;
 
 	/** @delay: The power sequencing delays needed for this panel. */
 	const struct panel_delay *delay;
 
-	/** @name: Name of this panel (for printing to logs). */
-	const char *name;
-
 	/** @override_edid_mode: Override the mode obtained by edid. */
 	const struct drm_display_mode *override_edid_mode;
 };
@@ -691,7 +688,7 @@ static int detected_panel_show(struct seq_file *s, void *data)
 	else if (!p->detected_panel)
 		seq_puts(s, "HARDCODED\n");
 	else
-		seq_printf(s, "%s\n", p->detected_panel->name);
+		seq_printf(s, "%s\n", p->detected_panel->ident.name);
 
 	return 0;
 }
@@ -761,11 +758,12 @@ static void panel_edp_parse_panel_timing_node(struct device *dev,
 		dev_err(dev, "Reject override mode: No display_timing found\n");
 }
 
-static const struct edp_panel_entry *find_edp_panel(u32 panel_id);
+static const struct edp_panel_entry *find_edp_panel(u32 panel_id, const struct drm_edid *edid);
 
 static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel)
 {
 	struct panel_desc *desc;
+	const struct drm_edid *base_block;
 	u32 panel_id;
 	char vend[4];
 	u16 product_id;
@@ -795,15 +793,19 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel)
 		goto exit;
 	}
 
-	panel_id = drm_edid_get_panel_id(panel->ddc);
-	if (!panel_id) {
+	base_block = drm_edid_read_base_block(panel->ddc);
+	if (base_block) {
+		panel_id = drm_edid_get_panel_id(base_block);
+	} else {
 		dev_err(dev, "Couldn't identify panel via EDID\n");
 		ret = -EIO;
 		goto exit;
 	}
 	drm_edid_decode_panel_id(panel_id, vend, &product_id);
 
-	panel->detected_panel = find_edp_panel(panel_id);
+	panel->detected_panel = find_edp_panel(panel_id, base_block);
+
+	drm_edid_free(base_block);
 
 	/*
 	 * We're using non-optimized timings and want it really obvious that
@@ -836,7 +838,7 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel)
 		panel->detected_panel = ERR_PTR(-EINVAL);
 	} else {
 		dev_info(dev, "Detected %s %s (%#06x)\n",
-			 vend, panel->detected_panel->name, product_id);
+			 vend, panel->detected_panel->ident.name, product_id);
 
 		/* Update the delay; everything else comes from EDID */
 		desc->delay = *panel->detected_panel->delay;
@@ -1005,6 +1007,19 @@ static const struct panel_desc auo_b101ean01 = {
 	},
 };
 
+static const struct drm_display_mode auo_b116xa3_mode = {
+	.clock = 70589,
+	.hdisplay = 1366,
+	.hsync_start = 1366 + 40,
+	.hsync_end = 1366 + 40 + 40,
+	.htotal = 1366 + 40 + 40 + 32,
+	.vdisplay = 768,
+	.vsync_start = 768 + 10,
+	.vsync_end = 768 + 10 + 12,
+	.vtotal = 768 + 10 + 12 + 6,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
 static const struct drm_display_mode auo_b116xak01_mode = {
 	.clock = 69300,
 	.hdisplay = 1366,
@@ -1865,6 +1880,13 @@ static const struct panel_delay delay_200_500_e50 = {
 	.enable = 50,
 };
 
+static const struct panel_delay delay_200_500_e50_p2e200 = {
+	.hpd_absent = 200,
+	.unprepare = 500,
+	.enable = 50,
+	.prepare_to_enable = 200,
+};
+
 static const struct panel_delay delay_200_500_e80 = {
 	.hpd_absent = 200,
 	.unprepare = 500,
@@ -1919,17 +1941,21 @@ static const struct panel_delay delay_200_500_e50_po2e200 = {
 
 #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \
 { \
-	.name = _name, \
-	.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \
-					     product_id), \
+	.ident = { \
+		.name = _name, \
+		.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \
+						     product_id), \
+	}, \
 	.delay = _delay \
 }
 
 #define EDP_PANEL_ENTRY2(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name, _mode) \
 { \
-	.name = _name, \
-	.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \
-					     product_id), \
+	.ident = { \
+		.name = _name, \
+		.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \
+						     product_id), \
+	}, \
 	.delay = _delay, \
 	.override_edid_mode = _mode \
 }
@@ -1953,7 +1979,9 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x239b, &delay_200_500_e50, "B116XAN06.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x255c, &delay_200_500_e50, "B116XTN02.5"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x403d, &delay_200_500_e50, "B140HAN04.0"),
-	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAN04.0"),
+	EDP_PANEL_ENTRY2('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0",
+			 &auo_b116xa3_mode),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x435c, &delay_200_500_e50, "Unknown"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"),
@@ -1961,6 +1989,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x639c, &delay_200_500_e50, "B140HAK02.7"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x723c, &delay_200_500_e50, "B140XTN07.2"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0xd497, &delay_200_500_e50, "B120XAN01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0xf390, &delay_200_500_e50, "B140XTN07.7"),
 
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0607, &delay_200_500_e200, "Unknown"),
@@ -2010,6 +2039,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b43, &delay_200_500_e200, "NV140FHM-T09"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b56, &delay_200_500_e80, "NT140FHM-N47"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c20, &delay_200_500_e80, "NT140FHM-N47"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0cb6, &delay_200_500_e200, "NT116WHM-N44"),
 
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1130, &delay_200_500_e50, "N116BGE-EB2"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1132, &delay_200_500_e80_d50, "N116BGE-EA2"),
@@ -2025,6 +2055,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1156, &delay_200_500_e80_d50, "Unknown"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1157, &delay_200_500_e80_d50, "N116BGE-EA2"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x115b, &delay_200_500_e80_d50, "N116BCN-EB1"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x115e, &delay_200_500_e80_d50, "N116BCA-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1247, &delay_200_500_e80_d50, "N120ACA-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x142b, &delay_200_500_e80_d50, "N140HCA-EAC"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x142e, &delay_200_500_e80_d50, "N140BGA-EA4"),
@@ -2034,7 +2065,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d6, &delay_200_500_e80_d50, "N140BGA-EA4"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14e5, &delay_200_500_e80_d50, "N140HGA-EA1"),
 
-	EDP_PANEL_ENTRY('C', 'S', 'O', 0x1200, &delay_200_500_e50, "MNC207QS1-1"),
+	EDP_PANEL_ENTRY('C', 'S', 'O', 0x1200, &delay_200_500_e50_p2e200, "MNC207QS1-1"),
 
 	EDP_PANEL_ENTRY('H', 'K', 'C', 0x2d51, &delay_200_500_e200, "Unknown"),
 	EDP_PANEL_ENTRY('H', 'K', 'C', 0x2d5b, &delay_200_500_e200, "Unknown"),
@@ -2076,15 +2107,25 @@ static const struct edp_panel_entry edp_panels[] = {
 	{ /* sentinal */ }
 };
 
-static const struct edp_panel_entry *find_edp_panel(u32 panel_id)
+static const struct edp_panel_entry *find_edp_panel(u32 panel_id, const struct drm_edid *edid)
 {
 	const struct edp_panel_entry *panel;
 
 	if (!panel_id)
 		return NULL;
 
-	for (panel = edp_panels; panel->panel_id; panel++)
-		if (panel->panel_id == panel_id)
+	/*
+	 * Match with identity first. This allows handling the case where
+	 * vendors incorrectly reused the same panel ID for multiple panels that
+	 * need different settings. If there's no match, try again with panel
+	 * ID, which should be unique.
+	 */
+	for (panel = edp_panels; panel->ident.panel_id; panel++)
+		if (drm_edid_match(edid, &panel->ident))
+			return panel;
+
+	for (panel = edp_panels; panel->ident.panel_id; panel++)
+		if (panel->ident.panel_id == panel_id)
 			return panel;
 
 	return NULL;
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
index 2ffe5f68a890..084c37fa7348 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
@@ -455,6 +455,202 @@ static const struct ili9881c_instr k101_im2byl02_init[] = {
 	ILI9881C_COMMAND_INSTR(0xD3, 0x3F), /* VN0 */
 };
 
+static const struct ili9881c_instr kd050hdfia020_init[] = {
+	ILI9881C_SWITCH_PAGE_INSTR(3),
+	ILI9881C_COMMAND_INSTR(0x01, 0x00),
+	ILI9881C_COMMAND_INSTR(0x02, 0x00),
+	ILI9881C_COMMAND_INSTR(0x03, 0x72),
+	ILI9881C_COMMAND_INSTR(0x04, 0x00),
+	ILI9881C_COMMAND_INSTR(0x05, 0x00),
+	ILI9881C_COMMAND_INSTR(0x06, 0x09),
+	ILI9881C_COMMAND_INSTR(0x07, 0x00),
+	ILI9881C_COMMAND_INSTR(0x08, 0x00),
+	ILI9881C_COMMAND_INSTR(0x09, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0a, 0x00),
+	ILI9881C_COMMAND_INSTR(0x0b, 0x00),
+	ILI9881C_COMMAND_INSTR(0x0c, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x0e, 0x00),
+	ILI9881C_COMMAND_INSTR(0x0f, 0x00),
+	ILI9881C_COMMAND_INSTR(0x10, 0x00),
+	ILI9881C_COMMAND_INSTR(0x11, 0x00),
+	ILI9881C_COMMAND_INSTR(0x12, 0x00),
+	ILI9881C_COMMAND_INSTR(0x13, 0x00),
+	ILI9881C_COMMAND_INSTR(0x14, 0x00),
+	ILI9881C_COMMAND_INSTR(0x15, 0x00),
+	ILI9881C_COMMAND_INSTR(0x16, 0x00),
+	ILI9881C_COMMAND_INSTR(0x17, 0x00),
+	ILI9881C_COMMAND_INSTR(0x18, 0x00),
+	ILI9881C_COMMAND_INSTR(0x19, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1a, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1b, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1c, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1e, 0x40),
+	ILI9881C_COMMAND_INSTR(0x1f, 0x80),
+	ILI9881C_COMMAND_INSTR(0x20, 0x05),
+	ILI9881C_COMMAND_INSTR(0x20, 0x05),
+	ILI9881C_COMMAND_INSTR(0x21, 0x02),
+	ILI9881C_COMMAND_INSTR(0x22, 0x00),
+	ILI9881C_COMMAND_INSTR(0x23, 0x00),
+	ILI9881C_COMMAND_INSTR(0x24, 0x00),
+	ILI9881C_COMMAND_INSTR(0x25, 0x00),
+	ILI9881C_COMMAND_INSTR(0x26, 0x00),
+	ILI9881C_COMMAND_INSTR(0x27, 0x00),
+	ILI9881C_COMMAND_INSTR(0x28, 0x33),
+	ILI9881C_COMMAND_INSTR(0x29, 0x02),
+	ILI9881C_COMMAND_INSTR(0x2a, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2b, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2c, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2e, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2f, 0x00),
+	ILI9881C_COMMAND_INSTR(0x30, 0x00),
+	ILI9881C_COMMAND_INSTR(0x31, 0x00),
+	ILI9881C_COMMAND_INSTR(0x32, 0x00),
+	ILI9881C_COMMAND_INSTR(0x32, 0x00),
+	ILI9881C_COMMAND_INSTR(0x33, 0x00),
+	ILI9881C_COMMAND_INSTR(0x34, 0x04),
+	ILI9881C_COMMAND_INSTR(0x35, 0x00),
+	ILI9881C_COMMAND_INSTR(0x36, 0x00),
+	ILI9881C_COMMAND_INSTR(0x37, 0x00),
+	ILI9881C_COMMAND_INSTR(0x38, 0x3C),
+	ILI9881C_COMMAND_INSTR(0x39, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3a, 0x40),
+	ILI9881C_COMMAND_INSTR(0x3b, 0x40),
+	ILI9881C_COMMAND_INSTR(0x3c, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3e, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3f, 0x00),
+	ILI9881C_COMMAND_INSTR(0x40, 0x00),
+	ILI9881C_COMMAND_INSTR(0x41, 0x00),
+	ILI9881C_COMMAND_INSTR(0x42, 0x00),
+	ILI9881C_COMMAND_INSTR(0x43, 0x00),
+	ILI9881C_COMMAND_INSTR(0x44, 0x00),
+	ILI9881C_COMMAND_INSTR(0x50, 0x01),
+	ILI9881C_COMMAND_INSTR(0x51, 0x23),
+	ILI9881C_COMMAND_INSTR(0x52, 0x45),
+	ILI9881C_COMMAND_INSTR(0x53, 0x67),
+	ILI9881C_COMMAND_INSTR(0x54, 0x89),
+	ILI9881C_COMMAND_INSTR(0x55, 0xab),
+	ILI9881C_COMMAND_INSTR(0x56, 0x01),
+	ILI9881C_COMMAND_INSTR(0x57, 0x23),
+	ILI9881C_COMMAND_INSTR(0x58, 0x45),
+	ILI9881C_COMMAND_INSTR(0x59, 0x67),
+	ILI9881C_COMMAND_INSTR(0x5a, 0x89),
+	ILI9881C_COMMAND_INSTR(0x5b, 0xab),
+	ILI9881C_COMMAND_INSTR(0x5c, 0xcd),
+	ILI9881C_COMMAND_INSTR(0x5d, 0xef),
+	ILI9881C_COMMAND_INSTR(0x5e, 0x11),
+	ILI9881C_COMMAND_INSTR(0x5f, 0x01),
+	ILI9881C_COMMAND_INSTR(0x60, 0x00),
+	ILI9881C_COMMAND_INSTR(0x61, 0x15),
+	ILI9881C_COMMAND_INSTR(0x62, 0x14),
+	ILI9881C_COMMAND_INSTR(0x63, 0x0E),
+	ILI9881C_COMMAND_INSTR(0x64, 0x0F),
+	ILI9881C_COMMAND_INSTR(0x65, 0x0C),
+	ILI9881C_COMMAND_INSTR(0x66, 0x0D),
+	ILI9881C_COMMAND_INSTR(0x67, 0x06),
+	ILI9881C_COMMAND_INSTR(0x68, 0x02),
+	ILI9881C_COMMAND_INSTR(0x69, 0x07),
+	ILI9881C_COMMAND_INSTR(0x6a, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6b, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6c, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6d, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6e, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6f, 0x02),
+	ILI9881C_COMMAND_INSTR(0x70, 0x02),
+	ILI9881C_COMMAND_INSTR(0x71, 0x02),
+	ILI9881C_COMMAND_INSTR(0x72, 0x02),
+	ILI9881C_COMMAND_INSTR(0x73, 0x02),
+	ILI9881C_COMMAND_INSTR(0x74, 0x02),
+	ILI9881C_COMMAND_INSTR(0x75, 0x01),
+	ILI9881C_COMMAND_INSTR(0x76, 0x00),
+	ILI9881C_COMMAND_INSTR(0x77, 0x14),
+	ILI9881C_COMMAND_INSTR(0x78, 0x15),
+	ILI9881C_COMMAND_INSTR(0x79, 0x0E),
+	ILI9881C_COMMAND_INSTR(0x7a, 0x0F),
+	ILI9881C_COMMAND_INSTR(0x7b, 0x0C),
+	ILI9881C_COMMAND_INSTR(0x7c, 0x0D),
+	ILI9881C_COMMAND_INSTR(0x7d, 0x06),
+	ILI9881C_COMMAND_INSTR(0x7e, 0x02),
+	ILI9881C_COMMAND_INSTR(0x7f, 0x07),
+	ILI9881C_COMMAND_INSTR(0x80, 0x02),
+	ILI9881C_COMMAND_INSTR(0x81, 0x02),
+	ILI9881C_COMMAND_INSTR(0x83, 0x02),
+	ILI9881C_COMMAND_INSTR(0x84, 0x02),
+	ILI9881C_COMMAND_INSTR(0x85, 0x02),
+	ILI9881C_COMMAND_INSTR(0x86, 0x02),
+	ILI9881C_COMMAND_INSTR(0x87, 0x02),
+	ILI9881C_COMMAND_INSTR(0x88, 0x02),
+	ILI9881C_COMMAND_INSTR(0x89, 0x02),
+	ILI9881C_COMMAND_INSTR(0x8A, 0x02),
+	ILI9881C_SWITCH_PAGE_INSTR(0x4),
+	ILI9881C_COMMAND_INSTR(0x6C, 0x15),
+	ILI9881C_COMMAND_INSTR(0x6E, 0x2A),
+	ILI9881C_COMMAND_INSTR(0x6F, 0x33),
+	ILI9881C_COMMAND_INSTR(0x3A, 0x94),
+	ILI9881C_COMMAND_INSTR(0x8D, 0x15),
+	ILI9881C_COMMAND_INSTR(0x87, 0xBA),
+	ILI9881C_COMMAND_INSTR(0x26, 0x76),
+	ILI9881C_COMMAND_INSTR(0xB2, 0xD1),
+	ILI9881C_COMMAND_INSTR(0xB5, 0x06),
+	ILI9881C_SWITCH_PAGE_INSTR(0x1),
+	ILI9881C_COMMAND_INSTR(0x22, 0x0A),
+	ILI9881C_COMMAND_INSTR(0x31, 0x00),
+	ILI9881C_COMMAND_INSTR(0x53, 0x90),
+	ILI9881C_COMMAND_INSTR(0x55, 0xA2),
+	ILI9881C_COMMAND_INSTR(0x50, 0xB7),
+	ILI9881C_COMMAND_INSTR(0x51, 0xB7),
+	ILI9881C_COMMAND_INSTR(0x60, 0x22),
+	ILI9881C_COMMAND_INSTR(0x61, 0x00),
+	ILI9881C_COMMAND_INSTR(0x62, 0x19),
+	ILI9881C_COMMAND_INSTR(0x63, 0x10),
+	ILI9881C_COMMAND_INSTR(0xA0, 0x08),
+	ILI9881C_COMMAND_INSTR(0xA1, 0x1A),
+	ILI9881C_COMMAND_INSTR(0xA2, 0x27),
+	ILI9881C_COMMAND_INSTR(0xA3, 0x15),
+	ILI9881C_COMMAND_INSTR(0xA4, 0x17),
+	ILI9881C_COMMAND_INSTR(0xA5, 0x2A),
+	ILI9881C_COMMAND_INSTR(0xA6, 0x1E),
+	ILI9881C_COMMAND_INSTR(0xA7, 0x1F),
+	ILI9881C_COMMAND_INSTR(0xA8, 0x8B),
+	ILI9881C_COMMAND_INSTR(0xA9, 0x1B),
+	ILI9881C_COMMAND_INSTR(0xAA, 0x27),
+	ILI9881C_COMMAND_INSTR(0xAB, 0x78),
+	ILI9881C_COMMAND_INSTR(0xAC, 0x18),
+	ILI9881C_COMMAND_INSTR(0xAD, 0x18),
+	ILI9881C_COMMAND_INSTR(0xAE, 0x4C),
+	ILI9881C_COMMAND_INSTR(0xAF, 0x21),
+	ILI9881C_COMMAND_INSTR(0xB0, 0x27),
+	ILI9881C_COMMAND_INSTR(0xB1, 0x54),
+	ILI9881C_COMMAND_INSTR(0xB2, 0x67),
+	ILI9881C_COMMAND_INSTR(0xB3, 0x39),
+	ILI9881C_COMMAND_INSTR(0xC0, 0x08),
+	ILI9881C_COMMAND_INSTR(0xC1, 0x1A),
+	ILI9881C_COMMAND_INSTR(0xC2, 0x27),
+	ILI9881C_COMMAND_INSTR(0xC3, 0x15),
+	ILI9881C_COMMAND_INSTR(0xC4, 0x17),
+	ILI9881C_COMMAND_INSTR(0xC5, 0x2A),
+	ILI9881C_COMMAND_INSTR(0xC6, 0x1E),
+	ILI9881C_COMMAND_INSTR(0xC7, 0x1F),
+	ILI9881C_COMMAND_INSTR(0xC8, 0x8B),
+	ILI9881C_COMMAND_INSTR(0xC9, 0x1B),
+	ILI9881C_COMMAND_INSTR(0xCA, 0x27),
+	ILI9881C_COMMAND_INSTR(0xCB, 0x78),
+	ILI9881C_COMMAND_INSTR(0xCC, 0x18),
+	ILI9881C_COMMAND_INSTR(0xCD, 0x18),
+	ILI9881C_COMMAND_INSTR(0xCE, 0x4C),
+	ILI9881C_COMMAND_INSTR(0xCF, 0x21),
+	ILI9881C_COMMAND_INSTR(0xD0, 0x27),
+	ILI9881C_COMMAND_INSTR(0xD1, 0x54),
+	ILI9881C_COMMAND_INSTR(0xD2, 0x67),
+	ILI9881C_COMMAND_INSTR(0xD3, 0x39),
+	ILI9881C_SWITCH_PAGE_INSTR(0),
+	ILI9881C_COMMAND_INSTR(0x35, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3A, 0x7),
+};
+
 static const struct ili9881c_instr tl050hdv35_init[] = {
 	ILI9881C_SWITCH_PAGE_INSTR(3),
 	ILI9881C_COMMAND_INSTR(0x01, 0x00),
@@ -1080,10 +1276,10 @@ static int ili9881c_prepare(struct drm_panel *panel)
 	msleep(5);
 
 	/* And reset it */
-	gpiod_set_value(ctx->reset, 1);
+	gpiod_set_value_cansleep(ctx->reset, 1);
 	msleep(20);
 
-	gpiod_set_value(ctx->reset, 0);
+	gpiod_set_value_cansleep(ctx->reset, 0);
 	msleep(20);
 
 	for (i = 0; i < ctx->desc->init_length; i++) {
@@ -1138,7 +1334,7 @@ static int ili9881c_unprepare(struct drm_panel *panel)
 
 	mipi_dsi_dcs_enter_sleep_mode(ctx->dsi);
 	regulator_disable(ctx->power);
-	gpiod_set_value(ctx->reset, 1);
+	gpiod_set_value_cansleep(ctx->reset, 1);
 
 	return 0;
 }
@@ -1177,6 +1373,23 @@ static const struct drm_display_mode k101_im2byl02_default_mode = {
 	.height_mm	= 217,
 };
 
+static const struct drm_display_mode kd050hdfia020_default_mode = {
+	.clock		= 62000,
+
+	.hdisplay	= 720,
+	.hsync_start	= 720 + 10,
+	.hsync_end	= 720 + 10 + 20,
+	.htotal		= 720 + 10 + 20 + 30,
+
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 10,
+	.vsync_end	= 1280 + 10 + 10,
+	.vtotal		= 1280 + 10 + 10 + 20,
+
+	.width_mm	= 62,
+	.height_mm	= 110,
+};
+
 static const struct drm_display_mode tl050hdv35_default_mode = {
 	.clock		= 59400,
 
@@ -1345,6 +1558,14 @@ static const struct ili9881c_desc k101_im2byl02_desc = {
 	.mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE,
 };
 
+static const struct ili9881c_desc kd050hdfia020_desc = {
+	.init = kd050hdfia020_init,
+	.init_length = ARRAY_SIZE(kd050hdfia020_init),
+	.mode = &kd050hdfia020_default_mode,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+		      MIPI_DSI_MODE_LPM,
+};
+
 static const struct ili9881c_desc tl050hdv35_desc = {
 	.init = tl050hdv35_init,
 	.init_length = ARRAY_SIZE(tl050hdv35_init),
@@ -1372,6 +1593,7 @@ static const struct ili9881c_desc am8001280g_desc = {
 static const struct of_device_id ili9881c_of_match[] = {
 	{ .compatible = "bananapi,lhr050h41", .data = &lhr050h41_desc },
 	{ .compatible = "feixin,k101-im2byl02", .data = &k101_im2byl02_desc },
+	{ .compatible = "startek,kd050hdfia020", .data = &kd050hdfia020_desc },
 	{ .compatible = "tdo,tl050hdv35", .data = &tl050hdv35_desc },
 	{ .compatible = "wanchanglong,w552946aba", .data = &w552946aba_desc },
 	{ .compatible = "ampire,am8001280g", .data = &am8001280g_desc },
diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
index 9d87cc1a357e..1a26205701b5 100644
--- a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
+++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
@@ -295,8 +295,6 @@ static int ltk050h3148w_init_sequence(struct ltk050h3146w *ctx)
 	mipi_dsi_dcs_write_seq(dsi, 0xbd, 0x00);
 	mipi_dsi_dcs_write_seq(dsi, 0xc6, 0xef);
 	mipi_dsi_dcs_write_seq(dsi, 0xd4, 0x02);
-	mipi_dsi_dcs_write_seq(dsi, 0x11);
-	mipi_dsi_dcs_write_seq(dsi, 0x29);
 
 	ret = mipi_dsi_dcs_set_tear_on(dsi, 1);
 	if (ret < 0) {
@@ -326,7 +324,8 @@ static const struct drm_display_mode ltk050h3148w_mode = {
 static const struct ltk050h3146w_desc ltk050h3148w_data = {
 	.mode = &ltk050h3148w_mode,
 	.init = ltk050h3148w_init_sequence,
-	.mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_VIDEO_BURST,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+		      MIPI_DSI_MODE_VIDEO_BURST,
 };
 
 static int ltk050h3146w_init_sequence(struct ltk050h3146w *ctx)
diff --git a/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c b/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c
index 76c2a8f6718c..9c336c71562b 100644
--- a/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c
+++ b/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c
@@ -109,19 +109,17 @@ static int atana33xc20_resume(struct device *dev)
 		if (hpd_asserted < 0)
 			ret = hpd_asserted;
 
-		if (ret)
+		if (ret) {
 			dev_warn(dev, "Error waiting for HPD GPIO: %d\n", ret);
-
-		return ret;
-	}
-
-	if (p->aux->wait_hpd_asserted) {
+			goto error;
+		}
+	} else if (p->aux->wait_hpd_asserted) {
 		ret = p->aux->wait_hpd_asserted(p->aux, HPD_MAX_US);
 
-		if (ret)
+		if (ret) {
 			dev_warn(dev, "Controller error waiting for HPD: %d\n", ret);
-
-		return ret;
+			goto error;
+		}
 	}
 
 	/*
@@ -133,6 +131,12 @@ static int atana33xc20_resume(struct device *dev)
 	 * right times.
 	 */
 	return 0;
+
+error:
+	drm_dp_dpcd_set_powered(p->aux, false);
+	regulator_disable(p->supply);
+
+	return ret;
 }
 
 static int atana33xc20_disable(struct drm_panel *panel)
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c b/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c
new file mode 100644
index 000000000000..10bc8fb5f1f9
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Samsung S6E3FA7 panel.
+ *
+ * Copyright (c) 2022-2024, The Linux Foundation. All rights reserved.
+ * Generated with linux-mdss-dsi-panel-driver-generator from vendor device tree:
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/backlight.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <video/mipi_display.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+struct s6e3fa7_panel {
+	struct drm_panel panel;
+	struct mipi_dsi_device *dsi;
+	struct gpio_desc *reset_gpio;
+};
+
+static inline struct s6e3fa7_panel *to_s6e3fa7_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct s6e3fa7_panel, panel);
+}
+
+static void s6e3fa7_panel_reset(struct s6e3fa7_panel *ctx)
+{
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	usleep_range(1000, 2000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	usleep_range(10000, 11000);
+}
+
+static int s6e3fa7_panel_on(struct s6e3fa7_panel *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to exit sleep mode: %d\n", ret);
+		return ret;
+	}
+	msleep(120);
+
+	ret = mipi_dsi_dcs_set_tear_on(dsi, MIPI_DSI_DCS_TEAR_MODE_VBLANK);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set tear on: %d\n", ret);
+		return ret;
+	}
+
+	mipi_dsi_dcs_write_seq(dsi, 0xf0, 0x5a, 0x5a);
+	mipi_dsi_dcs_write_seq(dsi, 0xf4,
+			       0xbb, 0x23, 0x19, 0x3a, 0x9f, 0x0f, 0x09, 0xc0,
+			       0x00, 0xb4, 0x37, 0x70, 0x79, 0x69);
+	mipi_dsi_dcs_write_seq(dsi, 0xf0, 0xa5, 0xa5);
+	mipi_dsi_dcs_write_seq(dsi, MIPI_DCS_WRITE_CONTROL_DISPLAY, 0x20);
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display on: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int s6e3fa7_panel_prepare(struct drm_panel *panel)
+{
+	struct s6e3fa7_panel *ctx = to_s6e3fa7_panel(panel);
+	struct device *dev = &ctx->dsi->dev;
+	int ret;
+
+	s6e3fa7_panel_reset(ctx);
+
+	ret = s6e3fa7_panel_on(ctx);
+	if (ret < 0) {
+		dev_err(dev, "Failed to initialize panel: %d\n", ret);
+		gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int s6e3fa7_panel_unprepare(struct drm_panel *panel)
+{
+	struct s6e3fa7_panel *ctx = to_s6e3fa7_panel(panel);
+
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+
+	return 0;
+}
+
+static int s6e3fa7_panel_disable(struct drm_panel *panel)
+{
+	struct s6e3fa7_panel *ctx = to_s6e3fa7_panel(panel);
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display off: %d\n", ret);
+		return ret;
+	}
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode: %d\n", ret);
+		return ret;
+	}
+	msleep(120);
+
+	return 0;
+}
+
+static const struct drm_display_mode s6e3fa7_panel_mode = {
+	.clock = (1080 + 32 + 32 + 78) * (2220 + 32 + 4 + 78) * 60 / 1000,
+	.hdisplay = 1080,
+	.hsync_start = 1080 + 32,
+	.hsync_end = 1080 + 32 + 32,
+	.htotal = 1080 + 32 + 32 + 78,
+	.vdisplay = 2220,
+	.vsync_start = 2220 + 32,
+	.vsync_end = 2220 + 32 + 4,
+	.vtotal = 2220 + 32 + 4 + 78,
+	.width_mm = 62,
+	.height_mm = 127,
+};
+
+static int s6e3fa7_panel_get_modes(struct drm_panel *panel,
+				 struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, &s6e3fa7_panel_mode);
+	if (!mode)
+		return -ENOMEM;
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs s6e3fa7_panel_funcs = {
+	.prepare = s6e3fa7_panel_prepare,
+	.unprepare = s6e3fa7_panel_unprepare,
+	.disable = s6e3fa7_panel_disable,
+	.get_modes = s6e3fa7_panel_get_modes,
+};
+
+static int s6e3fa7_panel_bl_update_status(struct backlight_device *bl)
+{
+	struct mipi_dsi_device *dsi = bl_get_data(bl);
+	u16 brightness = backlight_get_brightness(bl);
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_brightness_large(dsi, brightness);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int s6e3fa7_panel_bl_get_brightness(struct backlight_device *bl)
+{
+	struct mipi_dsi_device *dsi = bl_get_data(bl);
+	u16 brightness;
+	int ret;
+
+	ret = mipi_dsi_dcs_get_display_brightness_large(dsi, &brightness);
+	if (ret < 0)
+		return ret;
+
+	return brightness;
+}
+
+static const struct backlight_ops s6e3fa7_panel_bl_ops = {
+	.update_status = s6e3fa7_panel_bl_update_status,
+	.get_brightness = s6e3fa7_panel_bl_get_brightness,
+};
+
+static struct backlight_device *
+s6e3fa7_panel_create_backlight(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	const struct backlight_properties props = {
+		.type = BACKLIGHT_RAW,
+		.brightness = 1023,
+		.max_brightness = 1023,
+	};
+
+	return devm_backlight_device_register(dev, dev_name(dev), dev, dsi,
+					      &s6e3fa7_panel_bl_ops, &props);
+}
+
+static int s6e3fa7_panel_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct s6e3fa7_panel *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(ctx->reset_gpio))
+		return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio),
+				     "Failed to get reset-gpios\n");
+
+	ctx->dsi = dsi;
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM;
+
+	drm_panel_init(&ctx->panel, dev, &s6e3fa7_panel_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+	ctx->panel.prepare_prev_first = true;
+
+	ctx->panel.backlight = s6e3fa7_panel_create_backlight(dsi);
+	if (IS_ERR(ctx->panel.backlight))
+		return dev_err_probe(dev, PTR_ERR(ctx->panel.backlight),
+				     "Failed to create backlight\n");
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to attach to DSI host: %d\n", ret);
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void s6e3fa7_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct s6e3fa7_panel *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_remove(&ctx->panel);
+}
+
+static const struct of_device_id s6e3fa7_panel_of_match[] = {
+	{ .compatible = "samsung,s6e3fa7-ams559nk06" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, s6e3fa7_panel_of_match);
+
+static struct mipi_dsi_driver s6e3fa7_panel_driver = {
+	.probe = s6e3fa7_panel_probe,
+	.remove = s6e3fa7_panel_remove,
+	.driver = {
+		.name = "panel-samsung-s6e3fa7",
+		.of_match_table = s6e3fa7_panel_of_match,
+	},
+};
+module_mipi_dsi_driver(s6e3fa7_panel_driver);
+
+MODULE_AUTHOR("Richard Acayan <mailingradian@gmail.com>");
+MODULE_DESCRIPTION("DRM driver for Samsung S6E3FA7 command mode DSI panel");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 20e3df1c59d4..7215cf767898 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1457,6 +1457,32 @@ static const struct panel_desc boe_hv070wsa = {
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
+static const struct display_timing cct_cmt430b19n00_timing = {
+	.pixelclock = { 8000000, 9000000, 12000000 },
+	.hactive = { 480, 480, 480 },
+	.hfront_porch = { 2, 8, 75 },
+	.hback_porch = { 3, 43, 43 },
+	.hsync_len = { 2, 4, 75 },
+	.vactive = { 272, 272, 272 },
+	.vfront_porch = { 2, 8, 37 },
+	.vback_porch = { 2, 12, 12 },
+	.vsync_len = { 2, 4, 37 },
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW
+};
+
+static const struct panel_desc cct_cmt430b19n00 = {
+	.timings = &cct_cmt430b19n00_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 95,
+		.height = 53,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.connector_type = DRM_MODE_CONNECTOR_DPI,
+};
+
 static const struct drm_display_mode cdtech_s043wq26h_ct7_mode = {
 	.clock = 9000,
 	.hdisplay = 480,
@@ -3465,6 +3491,32 @@ static const struct panel_desc pda_91_00156_a0  = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 };
 
+static const struct drm_display_mode powertip_ph128800t006_zhc01_mode = {
+	.clock = 66500,
+	.hdisplay = 1280,
+	.hsync_start = 1280 + 12,
+	.hsync_end = 1280 + 12 + 20,
+	.htotal = 1280 + 12 + 20 + 56,
+	.vdisplay = 800,
+	.vsync_start = 800 + 1,
+	.vsync_end = 800 + 1 + 3,
+	.vtotal = 800 + 1 + 3 + 20,
+	.flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
+};
+
+static const struct panel_desc powertip_ph128800t006_zhc01 = {
+	.modes = &powertip_ph128800t006_zhc01_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 216,
+		.height = 135,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct drm_display_mode powertip_ph800480t013_idf02_mode = {
 	.clock = 24750,
 	.hdisplay = 800,
@@ -4403,6 +4455,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "boe,hv070wsa-100",
 		.data = &boe_hv070wsa
 	}, {
+		.compatible = "cct,cmt430b19n00",
+		.data = &cct_cmt430b19n00,
+	}, {
 		.compatible = "cdtech,s043wq26h-ct7",
 		.data = &cdtech_s043wq26h_ct7,
 	}, {
@@ -4640,6 +4695,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "pda,91-00156-a0",
 		.data = &pda_91_00156_a0,
 	}, {
+		.compatible = "powertip,ph128800t006-zhc01",
+		.data = &powertip_ph128800t006_zhc01,
+	}, {
 		.compatible = "powertip,ph800480t013-idf02",
 		.data = &powertip_ph800480t013_idf02,
 	}, {
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
index 2c01c1e7523e..7da2b3f02ed9 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -12,6 +12,4 @@ panfrost-y := \
 	panfrost_perfcnt.o \
 	panfrost_dump.o
 
-panfrost-$(CONFIG_DEBUG_FS) += panfrost_debugfs.o
-
 obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/panfrost_debugfs.c b/drivers/gpu/drm/panfrost/panfrost_debugfs.c
deleted file mode 100644
index 72d4286a6bf7..000000000000
--- a/drivers/gpu/drm/panfrost/panfrost_debugfs.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2023 Collabora ltd. */
-/* Copyright 2023 Amazon.com, Inc. or its affiliates. */
-
-#include <linux/debugfs.h>
-#include <linux/platform_device.h>
-#include <drm/drm_debugfs.h>
-#include <drm/drm_file.h>
-#include <drm/panfrost_drm.h>
-
-#include "panfrost_device.h"
-#include "panfrost_gpu.h"
-#include "panfrost_debugfs.h"
-
-void panfrost_debugfs_init(struct drm_minor *minor)
-{
-	struct drm_device *dev = minor->dev;
-	struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev->dev));
-
-	debugfs_create_atomic_t("profile", 0600, minor->debugfs_root, &pfdev->profile_mode);
-}
diff --git a/drivers/gpu/drm/panfrost/panfrost_debugfs.h b/drivers/gpu/drm/panfrost/panfrost_debugfs.h
deleted file mode 100644
index c5af5f35877f..000000000000
--- a/drivers/gpu/drm/panfrost/panfrost_debugfs.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2023 Collabora ltd.
- * Copyright 2023 Amazon.com, Inc. or its affiliates.
- */
-
-#ifndef PANFROST_DEBUGFS_H
-#define PANFROST_DEBUGFS_H
-
-#ifdef CONFIG_DEBUG_FS
-void panfrost_debugfs_init(struct drm_minor *minor);
-#endif
-
-#endif  /* PANFROST_DEBUGFS_H */
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 62f7e3527385..cffcb0ac7c11 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -130,7 +130,7 @@ struct panfrost_device {
 	struct list_head scheduled_jobs;
 
 	struct panfrost_perfcnt *perfcnt;
-	atomic_t profile_mode;
+	bool profile_mode;
 
 	struct mutex sched_lock;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index a926d71e8131..ef9f6c0716d5 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -20,7 +20,6 @@
 #include "panfrost_job.h"
 #include "panfrost_gpu.h"
 #include "panfrost_perfcnt.h"
-#include "panfrost_debugfs.h"
 
 static bool unstable_ioctls;
 module_param_unsafe(unstable_ioctls, bool, 0600);
@@ -551,10 +550,12 @@ static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
 	BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
 
 	for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
-		drm_printf(p, "drm-engine-%s:\t%llu ns\n",
-			   engine_names[i], panfrost_priv->engine_usage.elapsed_ns[i]);
-		drm_printf(p, "drm-cycles-%s:\t%llu\n",
-			   engine_names[i], panfrost_priv->engine_usage.cycles[i]);
+		if (pfdev->profile_mode) {
+			drm_printf(p, "drm-engine-%s:\t%llu ns\n",
+				   engine_names[i], panfrost_priv->engine_usage.elapsed_ns[i]);
+			drm_printf(p, "drm-cycles-%s:\t%llu\n",
+				   engine_names[i], panfrost_priv->engine_usage.cycles[i]);
+		}
 		drm_printf(p, "drm-maxfreq-%s:\t%lu Hz\n",
 			   engine_names[i], pfdev->pfdevfreq.fast_rate);
 		drm_printf(p, "drm-curfreq-%s:\t%lu Hz\n",
@@ -600,10 +601,6 @@ static const struct drm_driver panfrost_drm_driver = {
 
 	.gem_create_object	= panfrost_gem_create_object,
 	.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
-
-#ifdef CONFIG_DEBUG_FS
-	.debugfs_init		= panfrost_debugfs_init,
-#endif
 };
 
 static int panfrost_probe(struct platform_device *pdev)
@@ -692,6 +689,40 @@ static void panfrost_remove(struct platform_device *pdev)
 	drm_dev_put(ddev);
 }
 
+static ssize_t profiling_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d\n", pfdev->profile_mode);
+}
+
+static ssize_t profiling_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+	bool value;
+	int err;
+
+	err = kstrtobool(buf, &value);
+	if (err)
+		return err;
+
+	pfdev->profile_mode = value;
+
+	return len;
+}
+
+static DEVICE_ATTR_RW(profiling);
+
+static struct attribute *panfrost_attrs[] = {
+	&dev_attr_profiling.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(panfrost);
+
 /*
  * The OPP core wants the supply names to be NULL terminated, but we need the
  * correct num_supplies value for regulator core. Hence, we NULL terminate here
@@ -789,6 +820,7 @@ static struct platform_driver panfrost_driver = {
 		.name	= "panfrost",
 		.pm	= pm_ptr(&panfrost_pm_ops),
 		.of_match_table = dt_match,
+		.dev_groups = panfrost_groups,
 	},
 };
 module_platform_driver(panfrost_driver);
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 0c2dbf6ef2a5..a61ef0af9a4e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -243,7 +243,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 	subslot = panfrost_enqueue_job(pfdev, js, job);
 	/* Don't queue the job if a reset is in progress */
 	if (!atomic_read(&pfdev->reset.pending)) {
-		if (atomic_read(&pfdev->profile_mode)) {
+		if (pfdev->profile_mode) {
 			panfrost_cycle_counter_get(pfdev);
 			job->is_profiled = true;
 			job->start_time = ktime_get();
diff --git a/drivers/gpu/drm/panthor/Kconfig b/drivers/gpu/drm/panthor/Kconfig
new file mode 100644
index 000000000000..55b40ad07f3b
--- /dev/null
+++ b/drivers/gpu/drm/panthor/Kconfig
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+config DRM_PANTHOR
+	tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)"
+	depends on DRM
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on !GENERIC_ATOMIC64  # for IOMMU_IO_PGTABLE_LPAE
+	depends on MMU
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DRM_EXEC
+	select DRM_GEM_SHMEM_HELPER
+	select DRM_GPUVM
+	select DRM_SCHED
+	select IOMMU_IO_PGTABLE_LPAE
+	select IOMMU_SUPPORT
+	select PM_DEVFREQ
+	help
+	  DRM driver for ARM Mali CSF-based GPUs.
+
+	  This driver is for Mali (or Immortalis) Valhall Gxxx GPUs.
+
+	  Note that the Mali-G68 and Mali-G78, while Valhall architecture, will
+	  be supported with the panfrost driver as they are not CSF GPUs.
diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile
new file mode 100644
index 000000000000..15294719b09c
--- /dev/null
+++ b/drivers/gpu/drm/panthor/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+panthor-y := \
+	panthor_devfreq.o \
+	panthor_device.o \
+	panthor_drv.o \
+	panthor_fw.o \
+	panthor_gem.o \
+	panthor_gpu.o \
+	panthor_heap.o \
+	panthor_mmu.o \
+	panthor_sched.o
+
+obj-$(CONFIG_DRM_PANTHOR) += panthor.o
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
new file mode 100644
index 000000000000..c6d3c327cc24
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+
+#include <drm/drm_managed.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+
+/**
+ * struct panthor_devfreq - Device frequency management
+ */
+struct panthor_devfreq {
+	/** @devfreq: devfreq device. */
+	struct devfreq *devfreq;
+
+	/** @gov_data: Governor data. */
+	struct devfreq_simple_ondemand_data gov_data;
+
+	/** @busy_time: Busy time. */
+	ktime_t busy_time;
+
+	/** @idle_time: Idle time. */
+	ktime_t idle_time;
+
+	/** @time_last_update: Last update time. */
+	ktime_t time_last_update;
+
+	/** @last_busy_state: True if the GPU was busy last time we updated the state. */
+	bool last_busy_state;
+
+	/**
+	 * @lock: Lock used to protect busy_time, idle_time, time_last_update and
+	 * last_busy_state.
+	 *
+	 * These fields can be accessed concurrently by panthor_devfreq_get_dev_status()
+	 * and panthor_devfreq_record_{busy,idle}().
+	 */
+	spinlock_t lock;
+};
+
+static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
+{
+	ktime_t now, last;
+
+	now = ktime_get();
+	last = pdevfreq->time_last_update;
+
+	if (pdevfreq->last_busy_state)
+		pdevfreq->busy_time += ktime_sub(now, last);
+	else
+		pdevfreq->idle_time += ktime_sub(now, last);
+
+	pdevfreq->time_last_update = now;
+}
+
+static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
+				  u32 flags)
+{
+	struct dev_pm_opp *opp;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+	dev_pm_opp_put(opp);
+
+	return dev_pm_opp_set_rate(dev, *freq);
+}
+
+static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq)
+{
+	pdevfreq->busy_time = 0;
+	pdevfreq->idle_time = 0;
+	pdevfreq->time_last_update = ktime_get();
+}
+
+static int panthor_devfreq_get_dev_status(struct device *dev,
+					  struct devfreq_dev_status *status)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+	unsigned long irqflags;
+
+	status->current_frequency = clk_get_rate(ptdev->clks.core);
+
+	spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+	panthor_devfreq_update_utilization(pdevfreq);
+
+	status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
+						   pdevfreq->idle_time));
+
+	status->busy_time = ktime_to_ns(pdevfreq->busy_time);
+
+	panthor_devfreq_reset(pdevfreq);
+
+	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+
+	drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n",
+		status->busy_time, status->total_time,
+		status->busy_time / (status->total_time / 100),
+		status->current_frequency / 1000 / 1000);
+
+	return 0;
+}
+
+static struct devfreq_dev_profile panthor_devfreq_profile = {
+	.timer = DEVFREQ_TIMER_DELAYED,
+	.polling_ms = 50, /* ~3 frames */
+	.target = panthor_devfreq_target,
+	.get_dev_status = panthor_devfreq_get_dev_status,
+};
+
+int panthor_devfreq_init(struct panthor_device *ptdev)
+{
+	/* There's actually 2 regulators (mali and sram), but the OPP core only
+	 * supports one.
+	 *
+	 * We assume the sram regulator is coupled with the mali one and let
+	 * the coupling logic deal with voltage updates.
+	 */
+	static const char * const reg_names[] = { "mali", NULL };
+	struct thermal_cooling_device *cooling;
+	struct device *dev = ptdev->base.dev;
+	struct panthor_devfreq *pdevfreq;
+	struct dev_pm_opp *opp;
+	unsigned long cur_freq;
+	int ret;
+
+	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
+	if (!pdevfreq)
+		return -ENOMEM;
+
+	ptdev->devfreq = pdevfreq;
+
+	ret = devm_pm_opp_set_regulators(dev, reg_names);
+	if (ret) {
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
+
+		return ret;
+	}
+
+	ret = devm_pm_opp_of_add_table(dev);
+	if (ret)
+		return ret;
+
+	spin_lock_init(&pdevfreq->lock);
+
+	panthor_devfreq_reset(pdevfreq);
+
+	cur_freq = clk_get_rate(ptdev->clks.core);
+
+	opp = devfreq_recommended_opp(dev, &cur_freq, 0);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	panthor_devfreq_profile.initial_freq = cur_freq;
+
+	/* Regulator coupling only takes care of synchronizing/balancing voltage
+	 * updates, but the coupled regulator needs to be enabled manually.
+	 *
+	 * We use devm_regulator_get_enable_optional() and keep the sram supply
+	 * enabled until the device is removed, just like we do for the mali
+	 * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called,
+	 * and disabled when the opp_table is torn down, using the devm action.
+	 *
+	 * If we really care about disabling regulators on suspend, we should:
+	 * - use devm_regulator_get_optional() here
+	 * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function
+	 *   (this disables the regulator passed to the OPP layer)
+	 * - call dev_pm_opp_set_opp(dev, NULL) and
+	 *   regulator_disable(ptdev->regulators.sram) in
+	 *   panthor_devfreq_suspend()
+	 * - call dev_pm_opp_set_opp(dev, default_opp) and
+	 *   regulator_enable(ptdev->regulators.sram) in
+	 *   panthor_devfreq_resume()
+	 *
+	 * But without knowing if it's beneficial or not (in term of power
+	 * consumption), or how much it slows down the suspend/resume steps,
+	 * let's just keep regulators enabled for the device lifetime.
+	 */
+	ret = devm_regulator_get_enable_optional(dev, "sram");
+	if (ret && ret != -ENODEV) {
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n");
+		return ret;
+	}
+
+	/*
+	 * Set the recommend OPP this will enable and configure the regulator
+	 * if any and will avoid a switch off by regulator_late_cleanup()
+	 */
+	ret = dev_pm_opp_set_opp(dev, opp);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
+		return ret;
+	}
+
+	dev_pm_opp_put(opp);
+
+	/*
+	 * Setup default thresholds for the simple_ondemand governor.
+	 * The values are chosen based on experiments.
+	 */
+	pdevfreq->gov_data.upthreshold = 45;
+	pdevfreq->gov_data.downdifferential = 5;
+
+	pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile,
+						    DEVFREQ_GOV_SIMPLE_ONDEMAND,
+						    &pdevfreq->gov_data);
+	if (IS_ERR(pdevfreq->devfreq)) {
+		DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n");
+		ret = PTR_ERR(pdevfreq->devfreq);
+		pdevfreq->devfreq = NULL;
+		return ret;
+	}
+
+	cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL);
+	if (IS_ERR(cooling))
+		DRM_DEV_INFO(dev, "Failed to register cooling device\n");
+
+	return 0;
+}
+
+int panthor_devfreq_resume(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+
+	if (!pdevfreq->devfreq)
+		return 0;
+
+	panthor_devfreq_reset(pdevfreq);
+
+	return devfreq_resume_device(pdevfreq->devfreq);
+}
+
+int panthor_devfreq_suspend(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+
+	if (!pdevfreq->devfreq)
+		return 0;
+
+	return devfreq_suspend_device(pdevfreq->devfreq);
+}
+
+void panthor_devfreq_record_busy(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+	unsigned long irqflags;
+
+	if (!pdevfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+	panthor_devfreq_update_utilization(pdevfreq);
+	pdevfreq->last_busy_state = true;
+
+	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+}
+
+void panthor_devfreq_record_idle(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+	unsigned long irqflags;
+
+	if (!pdevfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+	panthor_devfreq_update_utilization(pdevfreq);
+	pdevfreq->last_busy_state = false;
+
+	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h
new file mode 100644
index 000000000000..83a5c9522493
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANTHOR_DEVFREQ_H__
+#define __PANTHOR_DEVFREQ_H__
+
+struct devfreq;
+struct thermal_cooling_device;
+
+struct panthor_device;
+struct panthor_devfreq;
+
+int panthor_devfreq_init(struct panthor_device *ptdev);
+
+int panthor_devfreq_resume(struct panthor_device *ptdev);
+int panthor_devfreq_suspend(struct panthor_device *ptdev);
+
+void panthor_devfreq_record_busy(struct panthor_device *ptdev);
+void panthor_devfreq_record_idle(struct panthor_device *ptdev);
+
+#endif /* __PANTHOR_DEVFREQ_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
new file mode 100644
index 000000000000..8b00f342d1b1
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -0,0 +1,561 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gpu.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+static int panthor_clk_init(struct panthor_device *ptdev)
+{
+	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
+	if (IS_ERR(ptdev->clks.core))
+		return dev_err_probe(ptdev->base.dev,
+				     PTR_ERR(ptdev->clks.core),
+				     "get 'core' clock failed");
+
+	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
+	if (IS_ERR(ptdev->clks.stacks))
+		return dev_err_probe(ptdev->base.dev,
+				     PTR_ERR(ptdev->clks.stacks),
+				     "get 'stacks' clock failed");
+
+	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
+	if (IS_ERR(ptdev->clks.coregroup))
+		return dev_err_probe(ptdev->base.dev,
+				     PTR_ERR(ptdev->clks.coregroup),
+				     "get 'coregroup' clock failed");
+
+	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
+	return 0;
+}
+
+void panthor_device_unplug(struct panthor_device *ptdev)
+{
+	/* This function can be called from two different path: the reset work
+	 * and the platform device remove callback. drm_dev_unplug() doesn't
+	 * deal with concurrent callers, so we have to protect drm_dev_unplug()
+	 * calls with our own lock, and bail out if the device is already
+	 * unplugged.
+	 */
+	mutex_lock(&ptdev->unplug.lock);
+	if (drm_dev_is_unplugged(&ptdev->base)) {
+		/* Someone beat us, release the lock and wait for the unplug
+		 * operation to be reported as done.
+		 **/
+		mutex_unlock(&ptdev->unplug.lock);
+		wait_for_completion(&ptdev->unplug.done);
+		return;
+	}
+
+	/* Call drm_dev_unplug() so any access to HW blocks happening after
+	 * that point get rejected.
+	 */
+	drm_dev_unplug(&ptdev->base);
+
+	/* We do the rest of the unplug with the unplug lock released,
+	 * future callers will wait on ptdev->unplug.done anyway.
+	 */
+	mutex_unlock(&ptdev->unplug.lock);
+
+	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
+
+	/* Now, try to cleanly shutdown the GPU before the device resources
+	 * get reclaimed.
+	 */
+	panthor_sched_unplug(ptdev);
+	panthor_fw_unplug(ptdev);
+	panthor_mmu_unplug(ptdev);
+	panthor_gpu_unplug(ptdev);
+
+	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+	pm_runtime_put_sync_suspend(ptdev->base.dev);
+
+	/* If PM is disabled, we need to call the suspend handler manually. */
+	if (!IS_ENABLED(CONFIG_PM))
+		panthor_device_suspend(ptdev->base.dev);
+
+	/* Report the unplug operation as done to unblock concurrent
+	 * panthor_device_unplug() callers.
+	 */
+	complete_all(&ptdev->unplug.done);
+}
+
+static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+
+	cancel_work_sync(&ptdev->reset.work);
+	destroy_workqueue(ptdev->reset.wq);
+}
+
+static void panthor_device_reset_work(struct work_struct *work)
+{
+	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
+	int ret = 0, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) {
+		/*
+		 * No need for a reset as the device has been (or will be)
+		 * powered down
+		 */
+		atomic_set(&ptdev->reset.pending, 0);
+		return;
+	}
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return;
+
+	panthor_sched_pre_reset(ptdev);
+	panthor_fw_pre_reset(ptdev, true);
+	panthor_mmu_pre_reset(ptdev);
+	panthor_gpu_soft_reset(ptdev);
+	panthor_gpu_l2_power_on(ptdev);
+	panthor_mmu_post_reset(ptdev);
+	ret = panthor_fw_post_reset(ptdev);
+	if (ret)
+		goto out_dev_exit;
+
+	atomic_set(&ptdev->reset.pending, 0);
+	panthor_sched_post_reset(ptdev);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+
+	if (ret) {
+		panthor_device_unplug(ptdev);
+		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
+	}
+}
+
+static bool panthor_device_is_initialized(struct panthor_device *ptdev)
+{
+	return !!ptdev->scheduler;
+}
+
+static void panthor_device_free_page(struct drm_device *ddev, void *data)
+{
+	__free_page(data);
+}
+
+int panthor_device_init(struct panthor_device *ptdev)
+{
+	u32 *dummy_page_virt;
+	struct resource *res;
+	struct page *p;
+	int ret;
+
+	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
+
+	init_completion(&ptdev->unplug.done);
+	ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock);
+	if (ret)
+		return ret;
+
+	ret = drmm_mutex_init(&ptdev->base, &ptdev->pm.mmio_lock);
+	if (ret)
+		return ret;
+
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	ptdev->pm.dummy_latest_flush = p;
+	dummy_page_virt = page_address(p);
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
+				       ptdev->pm.dummy_latest_flush);
+	if (ret)
+		return ret;
+
+	/*
+	 * Set the dummy page holding the latest flush to 1. This will cause the
+	 * flush to avoided as we know it isn't necessary if the submission
+	 * happens while the dummy page is mapped. Zero cannot be used because
+	 * that means 'always flush'.
+	 */
+	*dummy_page_virt = 1;
+
+	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
+	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
+	if (!ptdev->reset.wq)
+		return -ENOMEM;
+
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
+	if (ret)
+		return ret;
+
+	ret = panthor_clk_init(ptdev);
+	if (ret)
+		return ret;
+
+	ret = panthor_devfreq_init(ptdev);
+	if (ret)
+		return ret;
+
+	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
+							      0, &res);
+	if (IS_ERR(ptdev->iomem))
+		return PTR_ERR(ptdev->iomem);
+
+	ptdev->phys_addr = res->start;
+
+	ret = devm_pm_runtime_enable(ptdev->base.dev);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (ret)
+		return ret;
+
+	/* If PM is disabled, we need to call panthor_device_resume() manually. */
+	if (!IS_ENABLED(CONFIG_PM)) {
+		ret = panthor_device_resume(ptdev->base.dev);
+		if (ret)
+			return ret;
+	}
+
+	ret = panthor_gpu_init(ptdev);
+	if (ret)
+		goto err_rpm_put;
+
+	ret = panthor_mmu_init(ptdev);
+	if (ret)
+		goto err_unplug_gpu;
+
+	ret = panthor_fw_init(ptdev);
+	if (ret)
+		goto err_unplug_mmu;
+
+	ret = panthor_sched_init(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	/* ~3 frames */
+	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
+	pm_runtime_use_autosuspend(ptdev->base.dev);
+
+	ret = drm_dev_register(&ptdev->base, 0);
+	if (ret)
+		goto err_disable_autosuspend;
+
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+	return 0;
+
+err_disable_autosuspend:
+	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+	panthor_sched_unplug(ptdev);
+
+err_unplug_fw:
+	panthor_fw_unplug(ptdev);
+
+err_unplug_mmu:
+	panthor_mmu_unplug(ptdev);
+
+err_unplug_gpu:
+	panthor_gpu_unplug(ptdev);
+
+err_rpm_put:
+	pm_runtime_put_sync_suspend(ptdev->base.dev);
+	return ret;
+}
+
+#define PANTHOR_EXCEPTION(id) \
+	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
+		.name = #id, \
+	}
+
+struct panthor_exception_info {
+	const char *name;
+};
+
+static const struct panthor_exception_info panthor_exception_infos[] = {
+	PANTHOR_EXCEPTION(OK),
+	PANTHOR_EXCEPTION(TERMINATED),
+	PANTHOR_EXCEPTION(KABOOM),
+	PANTHOR_EXCEPTION(EUREKA),
+	PANTHOR_EXCEPTION(ACTIVE),
+	PANTHOR_EXCEPTION(CS_RES_TERM),
+	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
+	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
+	PANTHOR_EXCEPTION(CS_BUS_FAULT),
+	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
+	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
+	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
+	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
+	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
+	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
+	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
+	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
+	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
+	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
+	PANTHOR_EXCEPTION(OOM),
+	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
+	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
+	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
+	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
+	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
+	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
+	PANTHOR_EXCEPTION(PERM_FAULT_0),
+	PANTHOR_EXCEPTION(PERM_FAULT_1),
+	PANTHOR_EXCEPTION(PERM_FAULT_2),
+	PANTHOR_EXCEPTION(PERM_FAULT_3),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
+};
+
+const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
+{
+	if (exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
+	    !panthor_exception_infos[exception_code].name)
+		return "Unknown exception type";
+
+	return panthor_exception_infos[exception_code].name;
+}
+
+static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct panthor_device *ptdev = vma->vm_private_data;
+	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long pfn;
+	pgprot_t pgprot;
+	vm_fault_t ret;
+	bool active;
+	int cookie;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&ptdev->pm.mmio_lock);
+	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
+
+	switch (panthor_device_mmio_offset(id)) {
+	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+		if (active)
+			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
+		else
+			pfn = page_to_pfn(ptdev->pm.dummy_latest_flush);
+		break;
+
+	default:
+		ret = VM_FAULT_SIGBUS;
+		goto out_unlock;
+	}
+
+	pgprot = vma->vm_page_prot;
+	if (active)
+		pgprot = pgprot_noncached(pgprot);
+
+	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
+
+out_unlock:
+	mutex_unlock(&ptdev->pm.mmio_lock);
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static const struct vm_operations_struct panthor_mmio_vm_ops = {
+	.fault = panthor_mmio_vm_fault,
+};
+
+int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
+{
+	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
+
+	switch (panthor_device_mmio_offset(id)) {
+	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
+		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
+			return -EINVAL;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Defer actual mapping to the fault handler. */
+	vma->vm_private_data = ptdev;
+	vma->vm_ops = &panthor_mmio_vm_ops;
+	vm_flags_set(vma,
+		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
+		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
+	return 0;
+}
+
+int panthor_device_resume(struct device *dev)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	int ret, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_SUSPENDED)
+		return -EINVAL;
+
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
+
+	ret = clk_prepare_enable(ptdev->clks.core);
+	if (ret)
+		goto err_set_suspended;
+
+	ret = clk_prepare_enable(ptdev->clks.stacks);
+	if (ret)
+		goto err_disable_core_clk;
+
+	ret = clk_prepare_enable(ptdev->clks.coregroup);
+	if (ret)
+		goto err_disable_stacks_clk;
+
+	ret = panthor_devfreq_resume(ptdev);
+	if (ret)
+		goto err_disable_coregroup_clk;
+
+	if (panthor_device_is_initialized(ptdev) &&
+	    drm_dev_enter(&ptdev->base, &cookie)) {
+		panthor_gpu_resume(ptdev);
+		panthor_mmu_resume(ptdev);
+		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
+		if (!ret) {
+			panthor_sched_resume(ptdev);
+		} else {
+			panthor_mmu_suspend(ptdev);
+			panthor_gpu_suspend(ptdev);
+		}
+
+		drm_dev_exit(cookie);
+
+		if (ret)
+			goto err_suspend_devfreq;
+	}
+
+	if (atomic_read(&ptdev->reset.pending))
+		queue_work(ptdev->reset.wq, &ptdev->reset.work);
+
+	/* Clear all IOMEM mappings pointing to this device after we've
+	 * resumed. This way the fake mappings pointing to the dummy pages
+	 * are removed and the real iomem mapping will be restored on next
+	 * access.
+	 */
+	mutex_lock(&ptdev->pm.mmio_lock);
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
+	mutex_unlock(&ptdev->pm.mmio_lock);
+	return 0;
+
+err_suspend_devfreq:
+	panthor_devfreq_suspend(ptdev);
+
+err_disable_coregroup_clk:
+	clk_disable_unprepare(ptdev->clks.coregroup);
+
+err_disable_stacks_clk:
+	clk_disable_unprepare(ptdev->clks.stacks);
+
+err_disable_core_clk:
+	clk_disable_unprepare(ptdev->clks.core);
+
+err_set_suspended:
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	return ret;
+}
+
+int panthor_device_suspend(struct device *dev)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	int ret, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
+		return -EINVAL;
+
+	/* Clear all IOMEM mappings pointing to this device before we
+	 * shutdown the power-domain and clocks. Failing to do that results
+	 * in external aborts when the process accesses the iomem region.
+	 * We change the state and call unmap_mapping_range() with the
+	 * mmio_lock held to make sure the vm_fault handler won't set up
+	 * invalid mappings.
+	 */
+	mutex_lock(&ptdev->pm.mmio_lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING);
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	mutex_unlock(&ptdev->pm.mmio_lock);
+
+	if (panthor_device_is_initialized(ptdev) &&
+	    drm_dev_enter(&ptdev->base, &cookie)) {
+		cancel_work_sync(&ptdev->reset.work);
+
+		/* We prepare everything as if we were resetting the GPU.
+		 * The end of the reset will happen in the resume path though.
+		 */
+		panthor_sched_suspend(ptdev);
+		panthor_fw_suspend(ptdev);
+		panthor_mmu_suspend(ptdev);
+		panthor_gpu_suspend(ptdev);
+		drm_dev_exit(cookie);
+	}
+
+	ret = panthor_devfreq_suspend(ptdev);
+	if (ret) {
+		if (panthor_device_is_initialized(ptdev) &&
+		    drm_dev_enter(&ptdev->base, &cookie)) {
+			panthor_gpu_resume(ptdev);
+			panthor_mmu_resume(ptdev);
+			drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
+			panthor_sched_resume(ptdev);
+			drm_dev_exit(cookie);
+		}
+
+		goto err_set_active;
+	}
+
+	clk_disable_unprepare(ptdev->clks.coregroup);
+	clk_disable_unprepare(ptdev->clks.stacks);
+	clk_disable_unprepare(ptdev->clks.core);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	return 0;
+
+err_set_active:
+	/* If something failed and we have to revert back to an
+	 * active state, we also need to clear the MMIO userspace
+	 * mappings, so any dumb pages that were mapped while we
+	 * were trying to suspend gets invalidated.
+	 */
+	mutex_lock(&ptdev->pm.mmio_lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	mutex_unlock(&ptdev->pm.mmio_lock);
+	return ret;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
new file mode 100644
index 000000000000..c84c27dcc92c
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_DEVICE_H__
+#define __PANTHOR_DEVICE_H__
+
+#include <linux/atomic.h>
+#include <linux/io-pgtable.h>
+#include <linux/regulator/consumer.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_mm.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+struct panthor_csf;
+struct panthor_csf_ctx;
+struct panthor_device;
+struct panthor_gpu;
+struct panthor_group_pool;
+struct panthor_heap_pool;
+struct panthor_job;
+struct panthor_mmu;
+struct panthor_fw;
+struct panthor_perfcnt;
+struct panthor_vm;
+struct panthor_vm_pool;
+
+/**
+ * enum panthor_device_pm_state - PM state
+ */
+enum panthor_device_pm_state {
+	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
+	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
+
+	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
+	PANTHOR_DEVICE_PM_STATE_RESUMING,
+
+	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
+	PANTHOR_DEVICE_PM_STATE_ACTIVE,
+
+	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
+	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
+};
+
+/**
+ * struct panthor_irq - IRQ data
+ *
+ * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
+ */
+struct panthor_irq {
+	/** @ptdev: Panthor device */
+	struct panthor_device *ptdev;
+
+	/** @irq: IRQ number. */
+	int irq;
+
+	/** @mask: Current mask being applied to xxx_INT_MASK. */
+	u32 mask;
+
+	/** @suspended: Set to true when the IRQ is suspended. */
+	atomic_t suspended;
+};
+
+/**
+ * struct panthor_device - Panthor device
+ */
+struct panthor_device {
+	/** @base: Base drm_device. */
+	struct drm_device base;
+
+	/** @phys_addr: Physical address of the iomem region. */
+	phys_addr_t phys_addr;
+
+	/** @iomem: CPU mapping of the IOMEM region. */
+	void __iomem *iomem;
+
+	/** @clks: GPU clocks. */
+	struct {
+		/** @core: Core clock. */
+		struct clk *core;
+
+		/** @stacks: Stacks clock. This clock is optional. */
+		struct clk *stacks;
+
+		/** @coregroup: Core group clock. This clock is optional. */
+		struct clk *coregroup;
+	} clks;
+
+	/** @coherent: True if the CPU/GPU are memory coherent. */
+	bool coherent;
+
+	/** @gpu_info: GPU information. */
+	struct drm_panthor_gpu_info gpu_info;
+
+	/** @csif_info: Command stream interface information. */
+	struct drm_panthor_csif_info csif_info;
+
+	/** @gpu: GPU management data. */
+	struct panthor_gpu *gpu;
+
+	/** @fw: FW management data. */
+	struct panthor_fw *fw;
+
+	/** @mmu: MMU management data. */
+	struct panthor_mmu *mmu;
+
+	/** @scheduler: Scheduler management data. */
+	struct panthor_scheduler *scheduler;
+
+	/** @devfreq: Device frequency scaling management data. */
+	struct panthor_devfreq *devfreq;
+
+	/** @unplug: Device unplug related fields. */
+	struct {
+		/** @lock: Lock used to serialize unplug operations. */
+		struct mutex lock;
+
+		/**
+		 * @done: Completion object signaled when the unplug
+		 * operation is done.
+		 */
+		struct completion done;
+	} unplug;
+
+	/** @reset: Reset related fields. */
+	struct {
+		/** @wq: Ordered worqueud used to schedule reset operations. */
+		struct workqueue_struct *wq;
+
+		/** @work: Reset work. */
+		struct work_struct work;
+
+		/** @pending: Set to true if a reset is pending. */
+		atomic_t pending;
+	} reset;
+
+	/** @pm: Power management related data. */
+	struct {
+		/** @state: Power state. */
+		atomic_t state;
+
+		/**
+		 * @mmio_lock: Lock protecting MMIO userspace CPU mappings.
+		 *
+		 * This is needed to ensure we map the dummy IO pages when
+		 * the device is being suspended, and the real IO pages when
+		 * the device is being resumed. We can't just do with the
+		 * state atomicity to deal with this race.
+		 */
+		struct mutex mmio_lock;
+
+		/**
+		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
+		 *
+		 * Used to replace the real LATEST_FLUSH page when the GPU
+		 * is suspended.
+		 */
+		struct page *dummy_latest_flush;
+	} pm;
+};
+
+/**
+ * struct panthor_file - Panthor file
+ */
+struct panthor_file {
+	/** @ptdev: Device attached to this file. */
+	struct panthor_device *ptdev;
+
+	/** @vms: VM pool attached to this file. */
+	struct panthor_vm_pool *vms;
+
+	/** @groups: Scheduling group pool attached to this file. */
+	struct panthor_group_pool *groups;
+};
+
+int panthor_device_init(struct panthor_device *ptdev);
+void panthor_device_unplug(struct panthor_device *ptdev);
+
+/**
+ * panthor_device_schedule_reset() - Schedules a reset operation
+ */
+static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
+{
+	if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) &&
+	    atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE)
+		queue_work(ptdev->reset.wq, &ptdev->reset.work);
+}
+
+/**
+ * panthor_device_reset_is_pending() - Checks if a reset is pending.
+ *
+ * Return: true if a reset is pending, false otherwise.
+ */
+static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
+{
+	return atomic_read(&ptdev->reset.pending) != 0;
+}
+
+int panthor_device_mmap_io(struct panthor_device *ptdev,
+			   struct vm_area_struct *vma);
+
+int panthor_device_resume(struct device *dev);
+int panthor_device_suspend(struct device *dev);
+
+enum drm_panthor_exception_type {
+	DRM_PANTHOR_EXCEPTION_OK = 0x00,
+	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
+	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
+	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
+	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
+	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
+	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
+	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
+	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
+	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
+	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
+	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
+	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
+	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
+	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
+	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
+	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
+	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
+	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
+	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
+	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
+	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
+	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
+	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
+	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
+	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
+	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
+};
+
+/**
+ * panthor_exception_is_fault() - Checks if an exception is a fault.
+ *
+ * Return: true if the exception is a fault, false otherwise.
+ */
+static inline bool
+panthor_exception_is_fault(u32 exception_code)
+{
+	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
+}
+
+const char *panthor_exception_name(struct panthor_device *ptdev,
+				   u32 exception_code);
+
+/**
+ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
+ * registration function.
+ *
+ * The boiler-plate to gracefully deal with shared interrupts is
+ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
+ * just after the actual handler. The handler prototype is:
+ *
+ * void (*handler)(struct panthor_device *, u32 status);
+ */
+#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
+static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
+{												\
+	struct panthor_irq *pirq = data;							\
+	struct panthor_device *ptdev = pirq->ptdev;						\
+												\
+	if (atomic_read(&pirq->suspended))							\
+		return IRQ_NONE;								\
+	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
+		return IRQ_NONE;								\
+												\
+	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
+	return IRQ_WAKE_THREAD;									\
+}												\
+												\
+static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
+{												\
+	struct panthor_irq *pirq = data;							\
+	struct panthor_device *ptdev = pirq->ptdev;						\
+	irqreturn_t ret = IRQ_NONE;								\
+												\
+	while (true) {										\
+		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
+												\
+		if (!status)									\
+			break;									\
+												\
+		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
+												\
+		__handler(ptdev, status);							\
+		ret = IRQ_HANDLED;								\
+	}											\
+												\
+	if (!atomic_read(&pirq->suspended))							\
+		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
+												\
+	return ret;										\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
+{												\
+	int cookie;										\
+												\
+	atomic_set(&pirq->suspended, true);							\
+												\
+	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
+		synchronize_irq(pirq->irq);							\
+		drm_dev_exit(cookie);								\
+	}											\
+												\
+	pirq->mask = 0;										\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
+{												\
+	int cookie;										\
+												\
+	atomic_set(&pirq->suspended, false);							\
+	pirq->mask = mask;									\
+												\
+	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
+		drm_dev_exit(cookie);								\
+	}											\
+}												\
+												\
+static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
+					      struct panthor_irq *pirq,				\
+					      int irq, u32 mask)				\
+{												\
+	pirq->ptdev = ptdev;									\
+	pirq->irq = irq;									\
+	panthor_ ## __name ## _irq_resume(pirq, mask);						\
+												\
+	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
+					 panthor_ ## __name ## _irq_raw_handler,		\
+					 panthor_ ## __name ## _irq_threaded_handler,		\
+					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
+					 pirq);							\
+}
+
+/**
+ * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one
+ * @offset: Offset to convert.
+ *
+ * With 32-bit systems being limited by the 32-bit representation of mmap2's
+ * pgoffset field, we need to make the MMIO offset arch specific. This function
+ * converts a user MMIO offset into something the kernel driver understands.
+ *
+ * If the kernel and userspace architecture match, the offset is unchanged. If
+ * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match
+ * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible.
+ *
+ * Return: Adjusted offset.
+ */
+static inline u64 panthor_device_mmio_offset(u64 offset)
+{
+#ifdef CONFIG_ARM64
+	if (test_tsk_thread_flag(current, TIF_32BIT))
+		offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
+#endif
+
+	return offset;
+}
+
+extern struct workqueue_struct *panthor_cleanup_wq;
+
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
new file mode 100644
index 000000000000..11b3ccd58f85
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -0,0 +1,1473 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/pagemap.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gem.h"
+#include "panthor_gpu.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+/**
+ * DOC: user <-> kernel object copy helpers.
+ */
+
+/**
+ * panthor_set_uobj() - Copy kernel object to user object.
+ * @usr_ptr: Users pointer.
+ * @usr_size: Size of the user object.
+ * @min_size: Minimum size for this object.
+ * @kern_size: Size of the kernel object.
+ * @in: Address of the kernel object to copy.
+ *
+ * Helper automating kernel -> user object copies.
+ *
+ * Don't use this function directly, use PANTHOR_UOBJ_SET() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const void *in)
+{
+	/* User size shouldn't be smaller than the minimal object size. */
+	if (usr_size < min_size)
+		return -EINVAL;
+
+	if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_size, kern_size)))
+		return -EFAULT;
+
+	/* When the kernel object is smaller than the user object, we fill the gap with
+	 * zeros.
+	 */
+	if (usr_size > kern_size &&
+	    clear_user(u64_to_user_ptr(usr_ptr + kern_size), usr_size - kern_size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array.
+ * @in: The object array to copy.
+ * @min_stride: Minimum array stride.
+ * @obj_size: Kernel object size.
+ *
+ * Helper automating user -> kernel object copies.
+ *
+ * Don't use this function directly, use PANTHOR_UOBJ_GET_ARRAY() instead.
+ *
+ * Return: newly allocated object array or an ERR_PTR on error.
+ */
+static void *
+panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
+		       u32 obj_size)
+{
+	int ret = 0;
+	void *out_alloc;
+
+	/* User stride must be at least the minimum object size, otherwise it might
+	 * lack useful information.
+	 */
+	if (in->stride < min_stride)
+		return ERR_PTR(-EINVAL);
+
+	if (!in->count)
+		return NULL;
+
+	out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
+	if (!out_alloc)
+		return ERR_PTR(-ENOMEM);
+
+	if (obj_size == in->stride) {
+		/* Fast path when user/kernel have the same uAPI header version. */
+		if (copy_from_user(out_alloc, u64_to_user_ptr(in->array),
+				   (unsigned long)obj_size * in->count))
+			ret = -EFAULT;
+	} else {
+		void __user *in_ptr = u64_to_user_ptr(in->array);
+		void *out_ptr = out_alloc;
+
+		/* If the sizes differ, we need to copy elements one by one. */
+		for (u32 i = 0; i < in->count; i++) {
+			ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride);
+			if (ret)
+				break;
+
+			out_ptr += obj_size;
+			in_ptr += in->stride;
+		}
+	}
+
+	if (ret) {
+		kvfree(out_alloc);
+		return ERR_PTR(ret);
+	}
+
+	return out_alloc;
+}
+
+/**
+ * PANTHOR_UOBJ_MIN_SIZE_INTERNAL() - Get the minimum user object size
+ * @_typename: Object type.
+ * @_last_mandatory_field: Last mandatory field.
+ *
+ * Get the minimum user object size based on the last mandatory field name,
+ * A.K.A, the name of the last field of the structure at the time this
+ * structure was added to the uAPI.
+ *
+ * Don't use directly, use PANTHOR_UOBJ_DECL() instead.
+ */
+#define PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \
+	(offsetof(_typename, _last_mandatory_field) + \
+	 sizeof(((_typename *)NULL)->_last_mandatory_field))
+
+/**
+ * PANTHOR_UOBJ_DECL() - Declare a new uAPI object whose subject to
+ * evolutions.
+ * @_typename: Object type.
+ * @_last_mandatory_field: Last mandatory field.
+ *
+ * Should be used to extend the PANTHOR_UOBJ_MIN_SIZE() list.
+ */
+#define PANTHOR_UOBJ_DECL(_typename, _last_mandatory_field) \
+	_typename : PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field)
+
+/**
+ * PANTHOR_UOBJ_MIN_SIZE() - Get the minimum size of a given uAPI object
+ * @_obj_name: Object to get the minimum size of.
+ *
+ * Don't use this macro directly, it's automatically called by
+ * PANTHOR_UOBJ_{SET,GET_ARRAY}().
+ */
+#define PANTHOR_UOBJ_MIN_SIZE(_obj_name) \
+	_Generic(_obj_name, \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
+
+/**
+ * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
+ * @_dest_usr_ptr: User pointer to copy to.
+ * @_usr_size: Size of the user object.
+ * @_src_obj: Kernel object to copy (not a pointer).
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define PANTHOR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \
+	panthor_set_uobj(_dest_usr_ptr, _usr_size, \
+			 PANTHOR_UOBJ_MIN_SIZE(_src_obj), \
+			 sizeof(_src_obj), &(_src_obj))
+
+/**
+ * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible
+ * object array.
+ * @_dest_array: Local variable that will hold the newly allocated kernel
+ * object array.
+ * @_uobj_array: The drm_panthor_obj_array object describing the user object
+ * array.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define PANTHOR_UOBJ_GET_ARRAY(_dest_array, _uobj_array) \
+	({ \
+		typeof(_dest_array) _tmp; \
+		_tmp = panthor_get_uobj_array(_uobj_array, \
+					      PANTHOR_UOBJ_MIN_SIZE((_dest_array)[0]), \
+					      sizeof((_dest_array)[0])); \
+		if (!IS_ERR(_tmp)) \
+			_dest_array = _tmp; \
+		PTR_ERR_OR_ZERO(_tmp); \
+	})
+
+/**
+ * struct panthor_sync_signal - Represent a synchronization object point to attach
+ * our job fence to.
+ *
+ * This structure is here to keep track of fences that are currently bound to
+ * a specific syncobj point.
+ *
+ * At the beginning of a job submission, the fence
+ * is retrieved from the syncobj itself, and can be NULL if no fence was attached
+ * to this point.
+ *
+ * At the end, it points to the fence of the last job that had a
+ * %DRM_PANTHOR_SYNC_OP_SIGNAL on this syncobj.
+ *
+ * With jobs being submitted in batches, the fence might change several times during
+ * the process, allowing one job to wait on a job that's part of the same submission
+ * but appears earlier in the drm_panthor_group_submit::queue_submits array.
+ */
+struct panthor_sync_signal {
+	/** @node: list_head to track signal ops within a submit operation */
+	struct list_head node;
+
+	/** @handle: The syncobj handle. */
+	u32 handle;
+
+	/**
+	 * @point: The syncobj point.
+	 *
+	 * Zero for regular syncobjs, and non-zero for timeline syncobjs.
+	 */
+	u64 point;
+
+	/**
+	 * @syncobj: The sync object pointed by @handle.
+	 */
+	struct drm_syncobj *syncobj;
+
+	/**
+	 * @chain: Chain object used to link the new fence to an existing
+	 * timeline syncobj.
+	 *
+	 * NULL for regular syncobj, non-NULL for timeline syncobjs.
+	 */
+	struct dma_fence_chain *chain;
+
+	/**
+	 * @fence: The fence to assign to the syncobj or syncobj-point.
+	 */
+	struct dma_fence *fence;
+};
+
+/**
+ * struct panthor_job_ctx - Job context
+ */
+struct panthor_job_ctx {
+	/** @job: The job that is about to be submitted to drm_sched. */
+	struct drm_sched_job *job;
+
+	/** @syncops: Array of sync operations. */
+	struct drm_panthor_sync_op *syncops;
+
+	/** @syncop_count: Number of sync operations. */
+	u32 syncop_count;
+};
+
+/**
+ * struct panthor_submit_ctx - Submission context
+ *
+ * Anything that's related to a submission (%DRM_IOCTL_PANTHOR_VM_BIND or
+ * %DRM_IOCTL_PANTHOR_GROUP_SUBMIT) is kept here, so we can automate the
+ * initialization and cleanup steps.
+ */
+struct panthor_submit_ctx {
+	/** @file: DRM file this submission happens on. */
+	struct drm_file *file;
+
+	/**
+	 * @signals: List of struct panthor_sync_signal.
+	 *
+	 * %DRM_PANTHOR_SYNC_OP_SIGNAL operations will be recorded here,
+	 * and %DRM_PANTHOR_SYNC_OP_WAIT will first check if an entry
+	 * matching the syncobj+point exists before calling
+	 * drm_syncobj_find_fence(). This allows us to describe dependencies
+	 * existing between jobs that are part of the same batch.
+	 */
+	struct list_head signals;
+
+	/** @jobs: Array of jobs. */
+	struct panthor_job_ctx *jobs;
+
+	/** @job_count: Number of entries in the @jobs array. */
+	u32 job_count;
+
+	/** @exec: drm_exec context used to acquire and prepare resv objects. */
+	struct drm_exec exec;
+};
+
+#define PANTHOR_SYNC_OP_FLAGS_MASK \
+	(DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK | DRM_PANTHOR_SYNC_OP_SIGNAL)
+
+static bool sync_op_is_signal(const struct drm_panthor_sync_op *sync_op)
+{
+	return !!(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL);
+}
+
+static bool sync_op_is_wait(const struct drm_panthor_sync_op *sync_op)
+{
+	/* Note that DRM_PANTHOR_SYNC_OP_WAIT == 0 */
+	return !(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL);
+}
+
+/**
+ * panthor_check_sync_op() - Check drm_panthor_sync_op fields
+ * @sync_op: The sync operation to check.
+ *
+ * Return: 0 on success, -EINVAL otherwise.
+ */
+static int
+panthor_check_sync_op(const struct drm_panthor_sync_op *sync_op)
+{
+	u8 handle_type;
+
+	if (sync_op->flags & ~PANTHOR_SYNC_OP_FLAGS_MASK)
+		return -EINVAL;
+
+	handle_type = sync_op->flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK;
+	if (handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ &&
+	    handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ)
+		return -EINVAL;
+
+	if (handle_type == DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ &&
+	    sync_op->timeline_value != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * panthor_sync_signal_free() - Release resources and free a panthor_sync_signal object
+ * @sig_sync: Signal object to free.
+ */
+static void
+panthor_sync_signal_free(struct panthor_sync_signal *sig_sync)
+{
+	if (!sig_sync)
+		return;
+
+	drm_syncobj_put(sig_sync->syncobj);
+	dma_fence_chain_free(sig_sync->chain);
+	dma_fence_put(sig_sync->fence);
+	kfree(sig_sync);
+}
+
+/**
+ * panthor_submit_ctx_add_sync_signal() - Add a signal operation to a submit context
+ * @ctx: Context to add the signal operation to.
+ * @handle: Syncobj handle.
+ * @point: Syncobj point.
+ *
+ * Return: 0 on success, otherwise negative error value.
+ */
+static int
+panthor_submit_ctx_add_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
+{
+	struct panthor_sync_signal *sig_sync;
+	struct dma_fence *cur_fence;
+	int ret;
+
+	sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL);
+	if (!sig_sync)
+		return -ENOMEM;
+
+	sig_sync->handle = handle;
+	sig_sync->point = point;
+
+	if (point > 0) {
+		sig_sync->chain = dma_fence_chain_alloc();
+		if (!sig_sync->chain) {
+			ret = -ENOMEM;
+			goto err_free_sig_sync;
+		}
+	}
+
+	sig_sync->syncobj = drm_syncobj_find(ctx->file, handle);
+	if (!sig_sync->syncobj) {
+		ret = -EINVAL;
+		goto err_free_sig_sync;
+	}
+
+	/* Retrieve the current fence attached to that point. It's
+	 * perfectly fine to get a NULL fence here, it just means there's
+	 * no fence attached to that point yet.
+	 */
+	if (!drm_syncobj_find_fence(ctx->file, handle, point, 0, &cur_fence))
+		sig_sync->fence = cur_fence;
+
+	list_add_tail(&sig_sync->node, &ctx->signals);
+
+	return 0;
+
+err_free_sig_sync:
+	panthor_sync_signal_free(sig_sync);
+	return ret;
+}
+
+/**
+ * panthor_submit_ctx_search_sync_signal() - Search an existing signal operation in a
+ * submit context.
+ * @ctx: Context to search the signal operation in.
+ * @handle: Syncobj handle.
+ * @point: Syncobj point.
+ *
+ * Return: A valid panthor_sync_signal object if found, NULL otherwise.
+ */
+static struct panthor_sync_signal *
+panthor_submit_ctx_search_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
+{
+	struct panthor_sync_signal *sig_sync;
+
+	list_for_each_entry(sig_sync, &ctx->signals, node) {
+		if (handle == sig_sync->handle && point == sig_sync->point)
+			return sig_sync;
+	}
+
+	return NULL;
+}
+
+/**
+ * panthor_submit_ctx_add_job() - Add a job to a submit context
+ * @ctx: Context to search the signal operation in.
+ * @idx: Index of the job in the context.
+ * @job: Job to add.
+ * @syncs: Sync operations provided by userspace.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_add_job(struct panthor_submit_ctx *ctx, u32 idx,
+			   struct drm_sched_job *job,
+			   const struct drm_panthor_obj_array *syncs)
+{
+	int ret;
+
+	ctx->jobs[idx].job = job;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(ctx->jobs[idx].syncops, syncs);
+	if (ret)
+		return ret;
+
+	ctx->jobs[idx].syncop_count = syncs->count;
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_get_sync_signal() - Search signal operation and add one if none was found.
+ * @ctx: Context to search the signal operation in.
+ * @handle: Syncobj handle.
+ * @point: Syncobj point.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_get_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
+{
+	struct panthor_sync_signal *sig_sync;
+
+	sig_sync = panthor_submit_ctx_search_sync_signal(ctx, handle, point);
+	if (sig_sync)
+		return 0;
+
+	return panthor_submit_ctx_add_sync_signal(ctx, handle, point);
+}
+
+/**
+ * panthor_submit_ctx_update_job_sync_signal_fences() - Update fences
+ * on the signal operations specified by a job.
+ * @ctx: Context to search the signal operation in.
+ * @job_idx: Index of the job to operate on.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_update_job_sync_signal_fences(struct panthor_submit_ctx *ctx,
+						 u32 job_idx)
+{
+	struct panthor_device *ptdev = container_of(ctx->file->minor->dev,
+						    struct panthor_device,
+						    base);
+	struct dma_fence *done_fence = &ctx->jobs[job_idx].job->s_fence->finished;
+	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
+	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct dma_fence *old_fence;
+		struct panthor_sync_signal *sig_sync;
+
+		if (!sync_op_is_signal(&sync_ops[i]))
+			continue;
+
+		sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle,
+								 sync_ops[i].timeline_value);
+		if (drm_WARN_ON(&ptdev->base, !sig_sync))
+			return -EINVAL;
+
+		old_fence = sig_sync->fence;
+		sig_sync->fence = dma_fence_get(done_fence);
+		dma_fence_put(old_fence);
+
+		if (drm_WARN_ON(&ptdev->base, !sig_sync->fence))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_collect_job_signal_ops() - Iterate over all job signal operations
+ * and add them to the context.
+ * @ctx: Context to search the signal operation in.
+ * @job_idx: Index of the job to operate on.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_collect_job_signal_ops(struct panthor_submit_ctx *ctx,
+					  u32 job_idx)
+{
+	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
+	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		int ret;
+
+		if (!sync_op_is_signal(&sync_ops[i]))
+			continue;
+
+		ret = panthor_check_sync_op(&sync_ops[i]);
+		if (ret)
+			return ret;
+
+		ret = panthor_submit_ctx_get_sync_signal(ctx,
+							 sync_ops[i].handle,
+							 sync_ops[i].timeline_value);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_push_fences() - Iterate over the signal array, and for each entry, push
+ * the currently assigned fence to the associated syncobj.
+ * @ctx: Context to push fences on.
+ *
+ * This is the last step of a submission procedure, and is done once we know the submission
+ * is effective and job fences are guaranteed to be signaled in finite time.
+ */
+static void
+panthor_submit_ctx_push_fences(struct panthor_submit_ctx *ctx)
+{
+	struct panthor_sync_signal *sig_sync;
+
+	list_for_each_entry(sig_sync, &ctx->signals, node) {
+		if (sig_sync->chain) {
+			drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain,
+					      sig_sync->fence, sig_sync->point);
+			sig_sync->chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence);
+		}
+	}
+}
+
+/**
+ * panthor_submit_ctx_add_sync_deps_to_job() - Add sync wait operations as
+ * job dependencies.
+ * @ctx: Submit context.
+ * @job_idx: Index of the job to operate on.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_add_sync_deps_to_job(struct panthor_submit_ctx *ctx,
+					u32 job_idx)
+{
+	struct panthor_device *ptdev = container_of(ctx->file->minor->dev,
+						    struct panthor_device,
+						    base);
+	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
+	struct drm_sched_job *job = ctx->jobs[job_idx].job;
+	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
+	int ret = 0;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct panthor_sync_signal *sig_sync;
+		struct dma_fence *fence;
+
+		if (!sync_op_is_wait(&sync_ops[i]))
+			continue;
+
+		ret = panthor_check_sync_op(&sync_ops[i]);
+		if (ret)
+			return ret;
+
+		sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle,
+								 sync_ops[i].timeline_value);
+		if (sig_sync) {
+			if (drm_WARN_ON(&ptdev->base, !sig_sync->fence))
+				return -EINVAL;
+
+			fence = dma_fence_get(sig_sync->fence);
+		} else {
+			ret = drm_syncobj_find_fence(ctx->file, sync_ops[i].handle,
+						     sync_ops[i].timeline_value,
+						     0, &fence);
+			if (ret)
+				return ret;
+		}
+
+		ret = drm_sched_job_add_dependency(job, fence);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_collect_jobs_signal_ops() - Collect all signal operations
+ * and add them to the submit context.
+ * @ctx: Submit context.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_collect_jobs_signal_ops(struct panthor_submit_ctx *ctx)
+{
+	for (u32 i = 0; i < ctx->job_count; i++) {
+		int ret;
+
+		ret = panthor_submit_ctx_collect_job_signal_ops(ctx, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_add_deps_and_arm_jobs() - Add jobs dependencies and arm jobs
+ * @ctx: Submit context.
+ *
+ * Must be called after the resv preparation has been taken care of.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_add_deps_and_arm_jobs(struct panthor_submit_ctx *ctx)
+{
+	for (u32 i = 0; i < ctx->job_count; i++) {
+		int ret;
+
+		ret = panthor_submit_ctx_add_sync_deps_to_job(ctx, i);
+		if (ret)
+			return ret;
+
+		drm_sched_job_arm(ctx->jobs[i].job);
+
+		ret = panthor_submit_ctx_update_job_sync_signal_fences(ctx, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_push_jobs() - Push jobs to their scheduling entities.
+ * @ctx: Submit context.
+ * @upd_resvs: Callback used to update reservation objects that were previously
+ * preapred.
+ */
+static void
+panthor_submit_ctx_push_jobs(struct panthor_submit_ctx *ctx,
+			     void (*upd_resvs)(struct drm_exec *, struct drm_sched_job *))
+{
+	for (u32 i = 0; i < ctx->job_count; i++) {
+		upd_resvs(&ctx->exec, ctx->jobs[i].job);
+		drm_sched_entity_push_job(ctx->jobs[i].job);
+
+		/* Job is owned by the scheduler now. */
+		ctx->jobs[i].job = NULL;
+	}
+
+	panthor_submit_ctx_push_fences(ctx);
+}
+
+/**
+ * panthor_submit_ctx_init() - Initializes a submission context
+ * @ctx: Submit context to initialize.
+ * @file: drm_file this submission happens on.
+ * @job_count: Number of jobs that will be submitted.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int panthor_submit_ctx_init(struct panthor_submit_ctx *ctx,
+				   struct drm_file *file, u32 job_count)
+{
+	ctx->jobs = kvmalloc_array(job_count, sizeof(*ctx->jobs),
+				   GFP_KERNEL | __GFP_ZERO);
+	if (!ctx->jobs)
+		return -ENOMEM;
+
+	ctx->file = file;
+	ctx->job_count = job_count;
+	INIT_LIST_HEAD(&ctx->signals);
+	drm_exec_init(&ctx->exec,
+		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES,
+		      0);
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_cleanup() - Cleanup a submission context
+ * @ctx: Submit context to cleanup.
+ * @job_put: Job put callback.
+ */
+static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx,
+				       void (*job_put)(struct drm_sched_job *))
+{
+	struct panthor_sync_signal *sig_sync, *tmp;
+	unsigned long i;
+
+	drm_exec_fini(&ctx->exec);
+
+	list_for_each_entry_safe(sig_sync, tmp, &ctx->signals, node)
+		panthor_sync_signal_free(sig_sync);
+
+	for (i = 0; i < ctx->job_count; i++) {
+		job_put(ctx->jobs[i].job);
+		kvfree(ctx->jobs[i].syncops);
+	}
+
+	kvfree(ctx->jobs);
+}
+
+static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct drm_panthor_dev_query *args = data;
+
+	if (!args->pointer) {
+		switch (args->type) {
+		case DRM_PANTHOR_DEV_QUERY_GPU_INFO:
+			args->size = sizeof(ptdev->gpu_info);
+			return 0;
+
+		case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
+			args->size = sizeof(ptdev->csif_info);
+			return 0;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	switch (args->type) {
+	case DRM_PANTHOR_DEV_QUERY_GPU_INFO:
+		return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->gpu_info);
+
+	case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
+		return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+#define PANTHOR_VM_CREATE_FLAGS			0
+
+static int panthor_ioctl_vm_create(struct drm_device *ddev, void *data,
+				   struct drm_file *file)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_create *args = data;
+	int cookie, ret;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	ret = panthor_vm_pool_create_vm(ptdev, pfile->vms,  args);
+	if (ret >= 0) {
+		args->id = ret;
+		ret = 0;
+	}
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
+				    struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_destroy *args = data;
+
+	if (args->pad)
+		return -EINVAL;
+
+	return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
+}
+
+#define PANTHOR_BO_FLAGS		DRM_PANTHOR_BO_NO_MMAP
+
+static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
+				   struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_bo_create *args = data;
+	struct panthor_vm *vm = NULL;
+	int cookie, ret;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	if (!args->size || args->pad ||
+	    (args->flags & ~PANTHOR_BO_FLAGS)) {
+		ret = -EINVAL;
+		goto out_dev_exit;
+	}
+
+	if (args->exclusive_vm_id) {
+		vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
+		if (!vm) {
+			ret = -EINVAL;
+			goto out_dev_exit;
+		}
+	}
+
+	ret = panthor_gem_create_with_handle(file, ddev, vm, &args->size,
+					     args->flags, &args->handle);
+
+	panthor_vm_put(vm);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
+					struct drm_file *file)
+{
+	struct drm_panthor_bo_mmap_offset *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		goto out;
+
+	args->offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+out:
+	drm_gem_object_put(obj);
+	return ret;
+}
+
+static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_submit *args = data;
+	struct drm_panthor_queue_submit *jobs_args;
+	struct panthor_submit_ctx ctx;
+	int ret = 0, cookie;
+
+	if (args->pad)
+		return -EINVAL;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->queue_submits);
+	if (ret)
+		goto out_dev_exit;
+
+	ret = panthor_submit_ctx_init(&ctx, file, args->queue_submits.count);
+	if (ret)
+		goto out_free_jobs_args;
+
+	/* Create jobs and attach sync operations */
+	for (u32 i = 0; i < args->queue_submits.count; i++) {
+		const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i];
+		struct drm_sched_job *job;
+
+		job = panthor_job_create(pfile, args->group_handle, qsubmit);
+		if (IS_ERR(job)) {
+			ret = PTR_ERR(job);
+			goto out_cleanup_submit_ctx;
+		}
+
+		ret = panthor_submit_ctx_add_job(&ctx, i, job, &qsubmit->syncs);
+		if (ret)
+			goto out_cleanup_submit_ctx;
+	}
+
+	/*
+	 * Collect signal operations on all jobs, such that each job can pick
+	 * from it for its dependencies and update the fence to signal when the
+	 * job is submitted.
+	 */
+	ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/*
+	 * We acquire/prepare revs on all jobs before proceeding with the
+	 * dependency registration.
+	 *
+	 * This is solving two problems:
+	 * 1. drm_sched_job_arm() and drm_sched_entity_push_job() must be
+	 *    protected by a lock to make sure no concurrent access to the same
+	 *    entity get interleaved, which would mess up with the fence seqno
+	 *    ordering. Luckily, one of the resv being acquired is the VM resv,
+	 *    and a scheduling entity is only bound to a single VM. As soon as
+	 *    we acquire the VM resv, we should be safe.
+	 * 2. Jobs might depend on fences that were issued by previous jobs in
+	 *    the same batch, so we can't add dependencies on all jobs before
+	 *    arming previous jobs and registering the fence to the signal
+	 *    array, otherwise we might miss dependencies, or point to an
+	 *    outdated fence.
+	 */
+	if (args->queue_submits.count > 0) {
+		/* All jobs target the same group, so they also point to the same VM. */
+		struct panthor_vm *vm = panthor_job_vm(ctx.jobs[0].job);
+
+		drm_exec_until_all_locked(&ctx.exec) {
+			ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm,
+								  args->queue_submits.count);
+		}
+
+		if (ret)
+			goto out_cleanup_submit_ctx;
+	}
+
+	/*
+	 * Now that resvs are locked/prepared, we can iterate over each job to
+	 * add the dependencies, arm the job fence, register the job fence to
+	 * the signal array.
+	 */
+	ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/* Nothing can fail after that point, so we can make our job fences
+	 * visible to the outside world. Push jobs and set the job fences to
+	 * the resv slots we reserved.  This also pushes the fences to the
+	 * syncobjs that are part of the signal array.
+	 */
+	panthor_submit_ctx_push_jobs(&ctx, panthor_job_update_resvs);
+
+out_cleanup_submit_ctx:
+	panthor_submit_ctx_cleanup(&ctx, panthor_job_put);
+
+out_free_jobs_args:
+	kvfree(jobs_args);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data,
+				       struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_destroy *args = data;
+
+	if (args->pad)
+		return -EINVAL;
+
+	return panthor_group_destroy(pfile, args->group_handle);
+}
+
+static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_create *args = data;
+	struct drm_panthor_queue_create *queue_args;
+	int ret;
+
+	if (!args->queues.count)
+		return -EINVAL;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues);
+	if (ret)
+		return ret;
+
+	ret = panthor_group_create(pfile, args, queue_args);
+	if (ret >= 0) {
+		args->group_handle = ret;
+		ret = 0;
+	}
+
+	kvfree(queue_args);
+	return ret;
+}
+
+static int panthor_ioctl_group_get_state(struct drm_device *ddev, void *data,
+					 struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_get_state *args = data;
+
+	return panthor_group_get_state(pfile, args);
+}
+
+static int panthor_ioctl_tiler_heap_create(struct drm_device *ddev, void *data,
+					   struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_tiler_heap_create *args = data;
+	struct panthor_heap_pool *pool;
+	struct panthor_vm *vm;
+	int ret;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	pool = panthor_vm_get_heap_pool(vm, true);
+	if (IS_ERR(pool)) {
+		ret = PTR_ERR(pool);
+		goto out_put_vm;
+	}
+
+	ret = panthor_heap_create(pool,
+				  args->initial_chunk_count,
+				  args->chunk_size,
+				  args->max_chunks,
+				  args->target_in_flight,
+				  &args->tiler_heap_ctx_gpu_va,
+				  &args->first_heap_chunk_gpu_va);
+	if (ret < 0)
+		goto out_put_heap_pool;
+
+	/* Heap pools are per-VM. We combine the VM and HEAP id to make
+	 * a unique heap handle.
+	 */
+	args->handle = (args->vm_id << 16) | ret;
+	ret = 0;
+
+out_put_heap_pool:
+	panthor_heap_pool_put(pool);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+static int panthor_ioctl_tiler_heap_destroy(struct drm_device *ddev, void *data,
+					    struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_tiler_heap_destroy *args = data;
+	struct panthor_heap_pool *pool;
+	struct panthor_vm *vm;
+	int ret;
+
+	if (args->pad)
+		return -EINVAL;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->handle >> 16);
+	if (!vm)
+		return -EINVAL;
+
+	pool = panthor_vm_get_heap_pool(vm, false);
+	if (!pool) {
+		ret = -EINVAL;
+		goto out_put_vm;
+	}
+
+	ret = panthor_heap_destroy(pool, args->handle & GENMASK(15, 0));
+	panthor_heap_pool_put(pool);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+static int panthor_ioctl_vm_bind_async(struct drm_device *ddev,
+				       struct drm_panthor_vm_bind *args,
+				       struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_bind_op *jobs_args;
+	struct panthor_submit_ctx ctx;
+	struct panthor_vm *vm;
+	int ret = 0;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops);
+	if (ret)
+		goto out_put_vm;
+
+	ret = panthor_submit_ctx_init(&ctx, file, args->ops.count);
+	if (ret)
+		goto out_free_jobs_args;
+
+	for (u32 i = 0; i < args->ops.count; i++) {
+		struct drm_panthor_vm_bind_op *op = &jobs_args[i];
+		struct drm_sched_job *job;
+
+		job = panthor_vm_bind_job_create(file, vm, op);
+		if (IS_ERR(job)) {
+			ret = PTR_ERR(job);
+			goto out_cleanup_submit_ctx;
+		}
+
+		ret = panthor_submit_ctx_add_job(&ctx, i, job, &op->syncs);
+		if (ret)
+			goto out_cleanup_submit_ctx;
+	}
+
+	ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/* Prepare reservation objects for each VM_BIND job. */
+	drm_exec_until_all_locked(&ctx.exec) {
+		for (u32 i = 0; i < ctx.job_count; i++) {
+			ret = panthor_vm_bind_job_prepare_resvs(&ctx.exec, ctx.jobs[i].job);
+			drm_exec_retry_on_contention(&ctx.exec);
+			if (ret)
+				goto out_cleanup_submit_ctx;
+		}
+	}
+
+	ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/* Nothing can fail after that point. */
+	panthor_submit_ctx_push_jobs(&ctx, panthor_vm_bind_job_update_resvs);
+
+out_cleanup_submit_ctx:
+	panthor_submit_ctx_cleanup(&ctx, panthor_vm_bind_job_put);
+
+out_free_jobs_args:
+	kvfree(jobs_args);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+static int panthor_ioctl_vm_bind_sync(struct drm_device *ddev,
+				      struct drm_panthor_vm_bind *args,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_bind_op *jobs_args;
+	struct panthor_vm *vm;
+	int ret;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops);
+	if (ret)
+		goto out_put_vm;
+
+	for (u32 i = 0; i < args->ops.count; i++) {
+		ret = panthor_vm_bind_exec_sync_op(file, vm, &jobs_args[i]);
+		if (ret) {
+			/* Update ops.count so the user knows where things failed. */
+			args->ops.count = i;
+			break;
+		}
+	}
+
+	kvfree(jobs_args);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+#define PANTHOR_VM_BIND_FLAGS DRM_PANTHOR_VM_BIND_ASYNC
+
+static int panthor_ioctl_vm_bind(struct drm_device *ddev, void *data,
+				 struct drm_file *file)
+{
+	struct drm_panthor_vm_bind *args = data;
+	int cookie, ret;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	if (args->flags & DRM_PANTHOR_VM_BIND_ASYNC)
+		ret = panthor_ioctl_vm_bind_async(ddev, args, file);
+	else
+		ret = panthor_ioctl_vm_bind_sync(ddev, args, file);
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_get_state *args = data;
+	struct panthor_vm *vm;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	if (panthor_vm_is_unusable(vm))
+		args->state = DRM_PANTHOR_VM_STATE_UNUSABLE;
+	else
+		args->state = DRM_PANTHOR_VM_STATE_USABLE;
+
+	panthor_vm_put(vm);
+	return 0;
+}
+
+static int
+panthor_open(struct drm_device *ddev, struct drm_file *file)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct panthor_file *pfile;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
+	if (!pfile) {
+		ret = -ENOMEM;
+		goto err_put_mod;
+	}
+
+	pfile->ptdev = ptdev;
+
+	ret = panthor_vm_pool_create(pfile);
+	if (ret)
+		goto err_free_file;
+
+	ret = panthor_group_pool_create(pfile);
+	if (ret)
+		goto err_destroy_vm_pool;
+
+	file->driver_priv = pfile;
+	return 0;
+
+err_destroy_vm_pool:
+	panthor_vm_pool_destroy(pfile);
+
+err_free_file:
+	kfree(pfile);
+
+err_put_mod:
+	module_put(THIS_MODULE);
+	return ret;
+}
+
+static void
+panthor_postclose(struct drm_device *ddev, struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+
+	panthor_group_pool_destroy(pfile);
+	panthor_vm_pool_destroy(pfile);
+
+	kfree(pfile);
+	module_put(THIS_MODULE);
+}
+
+static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
+#define PANTHOR_IOCTL(n, func, flags) \
+	DRM_IOCTL_DEF_DRV(PANTHOR_##n, panthor_ioctl_##func, flags)
+
+	PANTHOR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_CREATE, vm_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_DESTROY, vm_destroy, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_BIND, vm_bind, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_GET_STATE, vm_get_state, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(BO_CREATE, bo_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_CREATE, group_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_DESTROY, group_destroy, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_GET_STATE, group_get_state, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
+};
+
+static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file = filp->private_data;
+	struct panthor_file *pfile = file->driver_priv;
+	struct panthor_device *ptdev = pfile->ptdev;
+	u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	int ret, cookie;
+
+	if (!drm_dev_enter(file->minor->dev, &cookie))
+		return -ENODEV;
+
+	if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET)
+		ret = panthor_device_mmap_io(ptdev, vma);
+	else
+		ret = drm_gem_mmap(filp, vma);
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static const struct file_operations panthor_drm_driver_fops = {
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.compat_ioctl = drm_compat_ioctl,
+	.poll = drm_poll,
+	.read = drm_read,
+	.llseek = noop_llseek,
+	.mmap = panthor_mmap,
+};
+
+#ifdef CONFIG_DEBUG_FS
+static void panthor_debugfs_init(struct drm_minor *minor)
+{
+	panthor_mmu_debugfs_init(minor);
+}
+#endif
+
+/*
+ * PanCSF driver version:
+ * - 1.0 - initial interface
+ */
+static const struct drm_driver panthor_drm_driver = {
+	.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
+			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+	.open = panthor_open,
+	.postclose = panthor_postclose,
+	.ioctls = panthor_drm_driver_ioctls,
+	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
+	.fops = &panthor_drm_driver_fops,
+	.name = "panthor",
+	.desc = "Panthor DRM driver",
+	.date = "20230801",
+	.major = 1,
+	.minor = 0,
+
+	.gem_create_object = panthor_gem_create_object,
+	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init = panthor_debugfs_init,
+#endif
+};
+
+static int panthor_probe(struct platform_device *pdev)
+{
+	struct panthor_device *ptdev;
+
+	ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver,
+				   struct panthor_device, base);
+	if (!ptdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ptdev);
+
+	return panthor_device_init(ptdev);
+}
+
+static void panthor_remove(struct platform_device *pdev)
+{
+	struct panthor_device *ptdev = platform_get_drvdata(pdev);
+
+	panthor_device_unplug(ptdev);
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "rockchip,rk3588-mali" },
+	{ .compatible = "arm,mali-valhall-csf" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops,
+				 panthor_device_suspend,
+				 panthor_device_resume,
+				 NULL);
+
+static struct platform_driver panthor_driver = {
+	.probe = panthor_probe,
+	.remove_new = panthor_remove,
+	.driver = {
+		.name = "panthor",
+		.pm = pm_ptr(&panthor_pm_ops),
+		.of_match_table = dt_match,
+	},
+};
+
+/*
+ * Workqueue used to cleanup stuff.
+ *
+ * We create a dedicated workqueue so we can drain on unplug and
+ * make sure all resources are freed before the module is unloaded.
+ */
+struct workqueue_struct *panthor_cleanup_wq;
+
+static int __init panthor_init(void)
+{
+	int ret;
+
+	ret = panthor_mmu_pt_cache_init();
+	if (ret)
+		return ret;
+
+	panthor_cleanup_wq = alloc_workqueue("panthor-cleanup", WQ_UNBOUND, 0);
+	if (!panthor_cleanup_wq) {
+		pr_err("panthor: Failed to allocate the workqueues");
+		ret = -ENOMEM;
+		goto err_mmu_pt_cache_fini;
+	}
+
+	ret = platform_driver_register(&panthor_driver);
+	if (ret)
+		goto err_destroy_cleanup_wq;
+
+	return 0;
+
+err_destroy_cleanup_wq:
+	destroy_workqueue(panthor_cleanup_wq);
+
+err_mmu_pt_cache_fini:
+	panthor_mmu_pt_cache_fini();
+	return ret;
+}
+module_init(panthor_init);
+
+static void __exit panthor_exit(void)
+{
+	platform_driver_unregister(&panthor_driver);
+	destroy_workqueue(panthor_cleanup_wq);
+	panthor_mmu_pt_cache_fini();
+}
+module_exit(panthor_exit);
+
+MODULE_AUTHOR("Panthor Project Developers");
+MODULE_DESCRIPTION("Panthor DRM Driver");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
new file mode 100644
index 000000000000..33c87a59834e
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -0,0 +1,1362 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+#include <asm/arch_timer.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/iopoll.h>
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gem.h"
+#include "panthor_gpu.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+#define CSF_FW_NAME "mali_csffw.bin"
+
+#define PING_INTERVAL_MS			12000
+#define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
+#define PROGRESS_TIMEOUT_SCALE_SHIFT		10
+#define IDLE_HYSTERESIS_US			800
+#define PWROFF_HYSTERESIS_US			10000
+
+/**
+ * struct panthor_fw_binary_hdr - Firmware binary header.
+ */
+struct panthor_fw_binary_hdr {
+	/** @magic: Magic value to check binary validity. */
+	u32 magic;
+#define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
+
+	/** @minor: Minor FW version. */
+	u8 minor;
+
+	/** @major: Major FW version. */
+	u8 major;
+#define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
+
+	/** @padding1: MBZ. */
+	u16 padding1;
+
+	/** @version_hash: FW version hash. */
+	u32 version_hash;
+
+	/** @padding2: MBZ. */
+	u32 padding2;
+
+	/** @size: FW binary size. */
+	u32 size;
+};
+
+/**
+ * enum panthor_fw_binary_entry_type - Firmware binary entry type
+ */
+enum panthor_fw_binary_entry_type {
+	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
+	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
+	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
+	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
+	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
+	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
+};
+
+#define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
+#define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
+#define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
+#define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
+
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
+
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
+	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
+
+/**
+ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
+ */
+struct panthor_fw_binary_section_entry_hdr {
+	/** @flags: Section flags. */
+	u32 flags;
+
+	/** @va: MCU virtual range to map this binary section to. */
+	struct {
+		/** @start: Start address. */
+		u32 start;
+
+		/** @end: End address. */
+		u32 end;
+	} va;
+
+	/** @data: Data to initialize the FW section with. */
+	struct {
+		/** @start: Start offset in the FW binary. */
+		u32 start;
+
+		/** @end: End offset in the FW binary. */
+		u32 end;
+	} data;
+};
+
+/**
+ * struct panthor_fw_binary_iter - Firmware binary iterator
+ *
+ * Used to parse a firmware binary.
+ */
+struct panthor_fw_binary_iter {
+	/** @data: FW binary data. */
+	const void *data;
+
+	/** @size: FW binary size. */
+	size_t size;
+
+	/** @offset: Iterator offset. */
+	size_t offset;
+};
+
+/**
+ * struct panthor_fw_section - FW section
+ */
+struct panthor_fw_section {
+	/** @node: Used to keep track of FW sections. */
+	struct list_head node;
+
+	/** @flags: Section flags, as encoded in the FW binary. */
+	u32 flags;
+
+	/** @mem: Section memory. */
+	struct panthor_kernel_bo *mem;
+
+	/**
+	 * @name: Name of the section, as specified in the binary.
+	 *
+	 * Can be NULL.
+	 */
+	const char *name;
+
+	/**
+	 * @data: Initial data copied to the FW memory.
+	 *
+	 * We keep data around so we can reload sections after a reset.
+	 */
+	struct {
+		/** @buf: Buffed used to store init data. */
+		const void *buf;
+
+		/** @size: Size of @buf in bytes. */
+		size_t size;
+	} data;
+};
+
+#define CSF_MCU_SHARED_REGION_START		0x04000000ULL
+#define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
+
+#define MIN_CS_PER_CSG				8
+#define MIN_CSGS				3
+#define MAX_CSG_PRIO				0xf
+
+#define CSF_IFACE_VERSION(major, minor, patch)	\
+	(((major) << 24) | ((minor) << 16) | (patch))
+#define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
+#define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
+#define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
+
+#define CSF_GROUP_CONTROL_OFFSET		0x1000
+#define CSF_STREAM_CONTROL_OFFSET		0x40
+#define CSF_UNPRESERVED_REG_COUNT		4
+
+/**
+ * struct panthor_fw_iface - FW interfaces
+ */
+struct panthor_fw_iface {
+	/** @global: Global interface. */
+	struct panthor_fw_global_iface global;
+
+	/** @groups: Group slot interfaces. */
+	struct panthor_fw_csg_iface groups[MAX_CSGS];
+
+	/** @streams: Command stream slot interfaces. */
+	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
+};
+
+/**
+ * struct panthor_fw - Firmware management
+ */
+struct panthor_fw {
+	/** @vm: MCU VM. */
+	struct panthor_vm *vm;
+
+	/** @sections: List of FW sections. */
+	struct list_head sections;
+
+	/** @shared_section: The section containing the FW interfaces. */
+	struct panthor_fw_section *shared_section;
+
+	/** @iface: FW interfaces. */
+	struct panthor_fw_iface iface;
+
+	/** @watchdog: Collection of fields relating to the FW watchdog. */
+	struct {
+		/** @ping_work: Delayed work used to ping the FW. */
+		struct delayed_work ping_work;
+	} watchdog;
+
+	/**
+	 * @req_waitqueue: FW request waitqueue.
+	 *
+	 * Everytime a request is sent to a command stream group or the global
+	 * interface, the caller will first busy wait for the request to be
+	 * acknowledged, and then fallback to a sleeping wait.
+	 *
+	 * This wait queue is here to support the sleeping wait flavor.
+	 */
+	wait_queue_head_t req_waitqueue;
+
+	/** @booted: True is the FW is booted */
+	bool booted;
+
+	/**
+	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
+	 *
+	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
+	 *
+	 * Any time the firmware is properly suspended, a fast reset can take place.
+	 * On the other hand, if the halt operation failed, the driver will reload
+	 * all sections to make sure we start from a fresh state.
+	 */
+	bool fast_reset;
+
+	/** @irq: Job irq data. */
+	struct panthor_irq irq;
+};
+
+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
+{
+	return ptdev->fw->vm;
+}
+
+/**
+ * panthor_fw_get_glb_iface() - Get the global interface
+ * @ptdev: Device.
+ *
+ * Return: The global interface.
+ */
+struct panthor_fw_global_iface *
+panthor_fw_get_glb_iface(struct panthor_device *ptdev)
+{
+	return &ptdev->fw->iface.global;
+}
+
+/**
+ * panthor_fw_get_csg_iface() - Get a command stream group slot interface
+ * @ptdev: Device.
+ * @csg_slot: Index of the command stream group slot.
+ *
+ * Return: The command stream group slot interface.
+ */
+struct panthor_fw_csg_iface *
+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
+{
+	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
+		return NULL;
+
+	return &ptdev->fw->iface.groups[csg_slot];
+}
+
+/**
+ * panthor_fw_get_cs_iface() - Get a command stream slot interface
+ * @ptdev: Device.
+ * @csg_slot: Index of the command stream group slot.
+ * @cs_slot: Index of the command stream slot.
+ *
+ * Return: The command stream slot interface.
+ */
+struct panthor_fw_cs_iface *
+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
+{
+	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG))
+		return NULL;
+
+	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
+}
+
+/**
+ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
+ * @ptdev: Device.
+ * @timeout_us: Timeout expressed in micro-seconds.
+ *
+ * The FW has two timer sources: the GPU counter or arch-timer. We need
+ * to express timeouts in term of number of cycles and specify which
+ * timer source should be used.
+ *
+ * Return: A value suitable for timeout fields in the global interface.
+ */
+static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
+{
+	bool use_cycle_counter = false;
+	u32 timer_rate = 0;
+	u64 mod_cycles;
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+	timer_rate = arch_timer_get_cntfrq();
+#endif
+
+	if (!timer_rate) {
+		use_cycle_counter = true;
+		timer_rate = clk_get_rate(ptdev->clks.core);
+	}
+
+	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
+		/* We couldn't get a valid clock rate, let's just pick the
+		 * maximum value so the FW still handles the core
+		 * power on/off requests.
+		 */
+		return GLB_TIMER_VAL(~0) |
+		       GLB_TIMER_SOURCE_GPU_COUNTER;
+	}
+
+	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
+				      1000000ull << 10);
+	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
+		mod_cycles = GLB_TIMER_VAL(~0);
+
+	return GLB_TIMER_VAL(mod_cycles) |
+	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
+}
+
+static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
+				       struct panthor_fw_binary_iter *iter,
+				       void *out, size_t size)
+{
+	size_t new_offset = iter->offset + size;
+
+	if (new_offset > iter->size || new_offset < iter->offset) {
+		drm_err(&ptdev->base, "Firmware too small\n");
+		return -EINVAL;
+	}
+
+	memcpy(out, iter->data + iter->offset, size);
+	iter->offset = new_offset;
+	return 0;
+}
+
+static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
+					   struct panthor_fw_binary_iter *iter,
+					   struct panthor_fw_binary_iter *sub_iter,
+					   size_t size)
+{
+	size_t new_offset = iter->offset + size;
+
+	if (new_offset > iter->size || new_offset < iter->offset) {
+		drm_err(&ptdev->base, "Firmware entry too long\n");
+		return -EINVAL;
+	}
+
+	sub_iter->offset = 0;
+	sub_iter->data = iter->data + iter->offset;
+	sub_iter->size = size;
+	iter->offset = new_offset;
+	return 0;
+}
+
+static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
+					struct panthor_fw_section *section)
+{
+	bool was_mapped = !!section->mem->kmap;
+	int ret;
+
+	if (!section->data.size &&
+	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
+		return;
+
+	ret = panthor_kernel_bo_vmap(section->mem);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		return;
+
+	memcpy(section->mem->kmap, section->data.buf, section->data.size);
+	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
+		memset(section->mem->kmap + section->data.size, 0,
+		       panthor_kernel_bo_size(section->mem) - section->data.size);
+	}
+
+	if (!was_mapped)
+		panthor_kernel_bo_vunmap(section->mem);
+}
+
+/**
+ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
+ * @ptdev: Device.
+ * @input: Pointer holding the input interface on success.
+ * Should be ignored on failure.
+ * @output: Pointer holding the output interface on success.
+ * Should be ignored on failure.
+ * @input_fw_va: Pointer holding the input interface FW VA on success.
+ * Should be ignored on failure.
+ * @output_fw_va: Pointer holding the output interface FW VA on success.
+ * Should be ignored on failure.
+ *
+ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
+ * interface is at offset 0, and the output interface at offset 4096.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
+				 struct panthor_fw_ringbuf_input_iface **input,
+				 const struct panthor_fw_ringbuf_output_iface **output,
+				 u32 *input_fw_va, u32 *output_fw_va)
+{
+	struct panthor_kernel_bo *mem;
+	int ret;
+
+	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
+				       DRM_PANTHOR_BO_NO_MMAP,
+				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+				       PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(mem))
+		return mem;
+
+	ret = panthor_kernel_bo_vmap(mem);
+	if (ret) {
+		panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
+		return ERR_PTR(ret);
+	}
+
+	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
+	*input = mem->kmap;
+	*output = mem->kmap + SZ_4K;
+	*input_fw_va = panthor_kernel_bo_gpuva(mem);
+	*output_fw_va = *input_fw_va + SZ_4K;
+
+	return mem;
+}
+
+/**
+ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
+ * @ptdev: Device.
+ * @size: Size of the suspend buffer.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
+{
+	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
+					DRM_PANTHOR_BO_NO_MMAP,
+					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+					PANTHOR_VM_KERNEL_AUTO_VA);
+}
+
+static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
+					 const struct firmware *fw,
+					 struct panthor_fw_binary_iter *iter,
+					 u32 ehdr)
+{
+	struct panthor_fw_binary_section_entry_hdr hdr;
+	struct panthor_fw_section *section;
+	u32 section_size;
+	u32 name_len;
+	int ret;
+
+	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
+	if (ret)
+		return ret;
+
+	if (hdr.data.end < hdr.data.start) {
+		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
+			hdr.data.end, hdr.data.start);
+		return -EINVAL;
+	}
+
+	if (hdr.va.end < hdr.va.start) {
+		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
+			hdr.va.end, hdr.va.start);
+		return -EINVAL;
+	}
+
+	if (hdr.data.end > fw->size) {
+		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
+			hdr.data.end, fw->size);
+		return -EINVAL;
+	}
+
+	if ((hdr.va.start & ~PAGE_MASK) != 0 ||
+	    (hdr.va.end & ~PAGE_MASK) != 0) {
+		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
+			hdr.va.start, hdr.va.end);
+		return -EINVAL;
+	}
+
+	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
+		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
+			hdr.flags);
+		return -EINVAL;
+	}
+
+	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
+		drm_warn(&ptdev->base,
+			 "Firmware protected mode entry not be supported, ignoring");
+		return 0;
+	}
+
+	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
+	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
+		drm_err(&ptdev->base,
+			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
+		return -EINVAL;
+	}
+
+	name_len = iter->size - iter->offset;
+
+	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
+	if (!section)
+		return -ENOMEM;
+
+	list_add_tail(&section->node, &ptdev->fw->sections);
+	section->flags = hdr.flags;
+	section->data.size = hdr.data.end - hdr.data.start;
+
+	if (section->data.size > 0) {
+		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
+
+		if (!data)
+			return -ENOMEM;
+
+		memcpy(data, fw->data + hdr.data.start, section->data.size);
+		section->data.buf = data;
+	}
+
+	if (name_len > 0) {
+		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
+
+		if (!name)
+			return -ENOMEM;
+
+		memcpy(name, iter->data + iter->offset, name_len);
+		name[name_len] = '\0';
+		section->name = name;
+	}
+
+	section_size = hdr.va.end - hdr.va.start;
+	if (section_size) {
+		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
+		struct panthor_gem_object *bo;
+		u32 vm_map_flags = 0;
+		struct sg_table *sgt;
+		u64 va = hdr.va.start;
+
+		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
+			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
+
+		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
+			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
+
+		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
+		 * non-cacheable for now. We might want to introduce a new
+		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
+		 * memory and is currently not used by our driver) for
+		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
+		 * of IO-coherent systems.
+		 */
+		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
+			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
+
+		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
+							section_size,
+							DRM_PANTHOR_BO_NO_MMAP,
+							vm_map_flags, va);
+		if (IS_ERR(section->mem))
+			return PTR_ERR(section->mem);
+
+		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
+			return -EINVAL;
+
+		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
+			ret = panthor_kernel_bo_vmap(section->mem);
+			if (ret)
+				return ret;
+		}
+
+		panthor_fw_init_section_mem(ptdev, section);
+
+		bo = to_panthor_bo(section->mem->obj);
+		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+		if (IS_ERR(sgt))
+			return PTR_ERR(sgt);
+
+		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
+	}
+
+	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
+		ptdev->fw->shared_section = section;
+
+	return 0;
+}
+
+static void
+panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
+{
+	struct panthor_fw_section *section;
+
+	list_for_each_entry(section, &ptdev->fw->sections, node) {
+		struct sg_table *sgt;
+
+		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
+			continue;
+
+		panthor_fw_init_section_mem(ptdev, section);
+		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
+		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
+			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
+	}
+}
+
+static int panthor_fw_load_entry(struct panthor_device *ptdev,
+				 const struct firmware *fw,
+				 struct panthor_fw_binary_iter *iter)
+{
+	struct panthor_fw_binary_iter eiter;
+	u32 ehdr;
+	int ret;
+
+	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
+	if (ret)
+		return ret;
+
+	if ((iter->offset % sizeof(u32)) ||
+	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
+		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
+			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
+		return -EINVAL;
+	}
+
+	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
+					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
+		return -EINVAL;
+
+	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
+	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
+		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
+
+	/* FIXME: handle those entry types? */
+	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
+	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
+	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
+	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
+		return 0;
+	default:
+		break;
+	}
+
+	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
+		return 0;
+
+	drm_err(&ptdev->base,
+		"Unsupported non-optional entry type %u in firmware\n",
+		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
+	return -EINVAL;
+}
+
+static int panthor_fw_load(struct panthor_device *ptdev)
+{
+	const struct firmware *fw = NULL;
+	struct panthor_fw_binary_iter iter = {};
+	struct panthor_fw_binary_hdr hdr;
+	char fw_path[128];
+	int ret;
+
+	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
+		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
+		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
+		 CSF_FW_NAME);
+
+	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
+	if (ret) {
+		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
+			CSF_FW_NAME);
+		return ret;
+	}
+
+	iter.data = fw->data;
+	iter.size = fw->size;
+	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
+	if (ret)
+		goto out;
+
+	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
+		ret = -EINVAL;
+		drm_err(&ptdev->base, "Invalid firmware magic\n");
+		goto out;
+	}
+
+	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
+		ret = -EINVAL;
+		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
+			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
+		goto out;
+	}
+
+	if (hdr.size > iter.size) {
+		drm_err(&ptdev->base, "Firmware image is truncated\n");
+		goto out;
+	}
+
+	iter.size = hdr.size;
+
+	while (iter.offset < hdr.size) {
+		ret = panthor_fw_load_entry(ptdev, fw, &iter);
+		if (ret)
+			goto out;
+	}
+
+	if (!ptdev->fw->shared_section) {
+		drm_err(&ptdev->base, "Shared interface region not found\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+out:
+	release_firmware(fw);
+	return ret;
+}
+
+/**
+ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
+ * @ptdev: Device.
+ * @mcu_va: MCU address.
+ *
+ * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
+ */
+static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
+{
+	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
+	u64 shared_mem_end = shared_mem_start +
+			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
+	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
+		return NULL;
+
+	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
+}
+
+static int panthor_init_cs_iface(struct panthor_device *ptdev,
+				 unsigned int csg_idx, unsigned int cs_idx)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
+	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
+	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
+	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
+			   (csg_idx * glb_iface->control->group_stride) +
+			   CSF_STREAM_CONTROL_OFFSET +
+			   (cs_idx * csg_iface->control->stream_stride);
+	struct panthor_fw_cs_iface *first_cs_iface =
+		panthor_fw_get_cs_iface(ptdev, 0, 0);
+
+	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
+		return -EINVAL;
+
+	spin_lock_init(&cs_iface->lock);
+	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
+	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
+	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
+
+	if (!cs_iface->input || !cs_iface->output) {
+		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
+		return -EINVAL;
+	}
+
+	if (cs_iface != first_cs_iface) {
+		if (cs_iface->control->features != first_cs_iface->control->features) {
+			drm_err(&ptdev->base, "Expecting identical CS slots");
+			return -EINVAL;
+		}
+	} else {
+		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
+
+		ptdev->csif_info.cs_reg_count = reg_count;
+		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
+	}
+
+	return 0;
+}
+
+static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
+			const struct panthor_fw_csg_control_iface *b)
+{
+	if (a->features != b->features)
+		return false;
+	if (a->suspend_size != b->suspend_size)
+		return false;
+	if (a->protm_suspend_size != b->protm_suspend_size)
+		return false;
+	if (a->stream_num != b->stream_num)
+		return false;
+	return true;
+}
+
+static int panthor_init_csg_iface(struct panthor_device *ptdev,
+				  unsigned int csg_idx)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
+	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
+	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
+	unsigned int i;
+
+	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
+		return -EINVAL;
+
+	spin_lock_init(&csg_iface->lock);
+	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
+	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
+	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
+
+	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
+	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
+		return -EINVAL;
+
+	if (!csg_iface->input || !csg_iface->output) {
+		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
+		return -EINVAL;
+	}
+
+	if (csg_idx > 0) {
+		struct panthor_fw_csg_iface *first_csg_iface =
+			panthor_fw_get_csg_iface(ptdev, 0);
+
+		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
+			drm_err(&ptdev->base, "Expecting identical CSG slots");
+			return -EINVAL;
+		}
+	}
+
+	for (i = 0; i < csg_iface->control->stream_num; i++) {
+		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static u32 panthor_get_instr_features(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
+		return 0;
+
+	return glb_iface->control->instr_features;
+}
+
+static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
+	unsigned int i;
+
+	if (!ptdev->fw->shared_section->mem->kmap)
+		return -EINVAL;
+
+	spin_lock_init(&glb_iface->lock);
+	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
+
+	if (!glb_iface->control->version) {
+		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
+		return -EINVAL;
+	}
+
+	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
+	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
+	if (!glb_iface->input || !glb_iface->output) {
+		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
+		return -EINVAL;
+	}
+
+	if (glb_iface->control->group_num > MAX_CSGS ||
+	    glb_iface->control->group_num < MIN_CSGS) {
+		drm_err(&ptdev->base, "Invalid number of control groups");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < glb_iface->control->group_num; i++) {
+		int ret = panthor_init_csg_iface(ptdev, i);
+
+		if (ret)
+			return ret;
+	}
+
+	drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
+		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
+		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
+		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
+		 glb_iface->control->features,
+		 panthor_get_instr_features(ptdev));
+	return 0;
+}
+
+static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	/* Enable all cores. */
+	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
+
+	/* Setup timers. */
+	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
+	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
+	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
+
+	/* Enable interrupts we care about. */
+	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
+					 GLB_PING |
+					 GLB_CFG_PROGRESS_TIMER |
+					 GLB_CFG_POWEROFF_TIMER |
+					 GLB_IDLE_EN |
+					 GLB_IDLE;
+
+	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
+	panthor_fw_toggle_reqs(glb_iface, req, ack,
+			       GLB_CFG_ALLOC_EN |
+			       GLB_CFG_POWEROFF_TIMER |
+			       GLB_CFG_PROGRESS_TIMER);
+
+	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+
+	/* Kick the watchdog. */
+	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
+			 msecs_to_jiffies(PING_INTERVAL_MS));
+}
+
+static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
+		ptdev->fw->booted = true;
+
+	wake_up_all(&ptdev->fw->req_waitqueue);
+
+	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
+	if (!ptdev->fw->booted)
+		return;
+
+	panthor_sched_report_fw_events(ptdev, status);
+}
+PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
+
+static int panthor_fw_start(struct panthor_device *ptdev)
+{
+	bool timedout = false;
+
+	ptdev->fw->booted = false;
+	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
+	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
+
+	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
+				ptdev->fw->booted,
+				msecs_to_jiffies(1000))) {
+		if (!ptdev->fw->booted &&
+		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
+			timedout = true;
+	}
+
+	if (timedout) {
+		static const char * const status_str[] = {
+			[MCU_STATUS_DISABLED] = "disabled",
+			[MCU_STATUS_ENABLED] = "enabled",
+			[MCU_STATUS_HALT] = "halt",
+			[MCU_STATUS_FATAL] = "fatal",
+		};
+		u32 status = gpu_read(ptdev, MCU_STATUS);
+
+		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
+			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void panthor_fw_stop(struct panthor_device *ptdev)
+{
+	u32 status;
+
+	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
+	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
+			       status == MCU_STATUS_DISABLED, 10, 100000))
+		drm_err(&ptdev->base, "Failed to stop MCU");
+}
+
+/**
+ * panthor_fw_pre_reset() - Call before a reset.
+ * @ptdev: Device.
+ * @on_hang: true if the reset was triggered on a GPU hang.
+ *
+ * If the reset is not triggered on a hang, we try to gracefully halt the
+ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
+ */
+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
+{
+	/* Make sure we won't be woken up by a ping. */
+	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
+
+	ptdev->fw->fast_reset = false;
+
+	if (!on_hang) {
+		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+		u32 status;
+
+		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
+		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
+					status == MCU_STATUS_HALT, 10, 100000) &&
+		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
+			ptdev->fw->fast_reset = true;
+		} else {
+			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
+		}
+
+		/* The FW detects 0 -> 1 transitions. Make sure we reset
+		 * the HALT bit before the FW is rebooted.
+		 */
+		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
+	}
+
+	panthor_job_irq_suspend(&ptdev->fw->irq);
+}
+
+/**
+ * panthor_fw_post_reset() - Call after a reset.
+ * @ptdev: Device.
+ *
+ * Start the FW. If this is not a fast reset, all FW sections are reloaded to
+ * make sure we can recover from a memory corruption.
+ */
+int panthor_fw_post_reset(struct panthor_device *ptdev)
+{
+	int ret;
+
+	/* Make the MCU VM active. */
+	ret = panthor_vm_active(ptdev->fw->vm);
+	if (ret)
+		return ret;
+
+	/* If this is a fast reset, try to start the MCU without reloading
+	 * the FW sections. If it fails, go for a full reset.
+	 */
+	if (ptdev->fw->fast_reset) {
+		ret = panthor_fw_start(ptdev);
+		if (!ret)
+			goto out;
+
+		/* Force a disable, so we get a fresh boot on the next
+		 * panthor_fw_start() call.
+		 */
+		gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
+		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
+	}
+
+	/* Reload all sections, including RO ones. We're not supposed
+	 * to end up here anyway, let's just assume the overhead of
+	 * reloading everything is acceptable.
+	 */
+	panthor_reload_fw_sections(ptdev, true);
+
+	ret = panthor_fw_start(ptdev);
+	if (ret) {
+		drm_err(&ptdev->base, "FW slow reset failed");
+		return ret;
+	}
+
+out:
+	/* We must re-initialize the global interface even on fast-reset. */
+	panthor_fw_init_global_iface(ptdev);
+	return 0;
+}
+
+/**
+ * panthor_fw_unplug() - Called when the device is unplugged.
+ * @ptdev: Device.
+ *
+ * This function must make sure all pending operations are flushed before
+ * will release device resources, thus preventing any interaction with
+ * the HW.
+ *
+ * If there is still FW-related work running after this function returns,
+ * they must use drm_dev_{enter,exit}() and skip any HW access when
+ * drm_dev_enter() returns false.
+ */
+void panthor_fw_unplug(struct panthor_device *ptdev)
+{
+	struct panthor_fw_section *section;
+
+	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
+
+	/* Make sure the IRQ handler can be called after that point. */
+	if (ptdev->fw->irq.irq)
+		panthor_job_irq_suspend(&ptdev->fw->irq);
+
+	panthor_fw_stop(ptdev);
+
+	list_for_each_entry(section, &ptdev->fw->sections, node)
+		panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
+
+	/* We intentionally don't call panthor_vm_idle() and let
+	 * panthor_mmu_unplug() release the AS we acquired with
+	 * panthor_vm_active() so we don't have to track the VM active/idle
+	 * state to keep the active_refcnt balanced.
+	 */
+	panthor_vm_put(ptdev->fw->vm);
+
+	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
+}
+
+/**
+ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
+ * @req_ptr: Pointer to the req register.
+ * @ack_ptr: Pointer to the ack register.
+ * @wq: Wait queue to use for the sleeping wait.
+ * @req_mask: Mask of requests to wait for.
+ * @acked: Pointer to field that's updated with the acked requests.
+ * If the function returns 0, *acked == req_mask.
+ * @timeout_ms: Timeout expressed in milliseconds.
+ *
+ * Return: 0 on success, -ETIMEDOUT otherwise.
+ */
+static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
+				wait_queue_head_t *wq,
+				u32 req_mask, u32 *acked,
+				u32 timeout_ms)
+{
+	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
+	int ret;
+
+	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
+	*acked = req_mask;
+	ret = read_poll_timeout_atomic(READ_ONCE, ack,
+				       (ack & req_mask) == req,
+				       0, 10, 0,
+				       *ack_ptr);
+	if (!ret)
+		return 0;
+
+	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
+			       msecs_to_jiffies(timeout_ms)))
+		return 0;
+
+	/* Check one last time, in case we were not woken up for some reason. */
+	ack = READ_ONCE(*ack_ptr);
+	if ((ack & req_mask) == req)
+		return 0;
+
+	*acked = ~(req ^ ack) & req_mask;
+	return -ETIMEDOUT;
+}
+
+/**
+ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
+ * @ptdev: Device.
+ * @req_mask: Mask of requests to wait for.
+ * @acked: Pointer to field that's updated with the acked requests.
+ * If the function returns 0, *acked == req_mask.
+ * @timeout_ms: Timeout expressed in milliseconds.
+ *
+ * Return: 0 on success, -ETIMEDOUT otherwise.
+ */
+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
+			     u32 req_mask, u32 *acked,
+			     u32 timeout_ms)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	/* GLB_HALT doesn't get acked through the FW interface. */
+	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
+		return -EINVAL;
+
+	return panthor_fw_wait_acks(&glb_iface->input->req,
+				    &glb_iface->output->ack,
+				    &ptdev->fw->req_waitqueue,
+				    req_mask, acked, timeout_ms);
+}
+
+/**
+ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
+ * @ptdev: Device.
+ * @csg_slot: CSG slot ID.
+ * @req_mask: Mask of requests to wait for.
+ * @acked: Pointer to field that's updated with the acked requests.
+ * If the function returns 0, *acked == req_mask.
+ * @timeout_ms: Timeout expressed in milliseconds.
+ *
+ * Return: 0 on success, -ETIMEDOUT otherwise.
+ */
+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
+			     u32 req_mask, u32 *acked, u32 timeout_ms)
+{
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
+	int ret;
+
+	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
+		return -EINVAL;
+
+	ret = panthor_fw_wait_acks(&csg_iface->input->req,
+				   &csg_iface->output->ack,
+				   &ptdev->fw->req_waitqueue,
+				   req_mask, acked, timeout_ms);
+
+	/*
+	 * Check that all bits in the state field were updated, if any mismatch
+	 * then clear all bits in the state field. This allows code to do
+	 * (acked & CSG_STATE_MASK) and get the right value.
+	 */
+
+	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
+		*acked &= ~CSG_STATE_MASK;
+
+	return ret;
+}
+
+/**
+ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
+ * @ptdev: Device.
+ * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
+ *
+ * This function is toggling bits in the doorbell_req and ringing the
+ * global doorbell. It doesn't require a user doorbell to be attached to
+ * the group.
+ */
+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
+	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+}
+
+static void panthor_fw_ping_work(struct work_struct *work)
+{
+	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
+	struct panthor_device *ptdev = fw->irq.ptdev;
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	u32 acked;
+	int ret;
+
+	if (panthor_device_reset_is_pending(ptdev))
+		return;
+
+	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
+	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+
+	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
+	} else {
+		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
+				 msecs_to_jiffies(PING_INTERVAL_MS));
+	}
+}
+
+/**
+ * panthor_fw_init() - Initialize FW related data.
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_fw_init(struct panthor_device *ptdev)
+{
+	struct panthor_fw *fw;
+	int ret, irq;
+
+	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
+	if (!fw)
+		return -ENOMEM;
+
+	ptdev->fw = fw;
+	init_waitqueue_head(&fw->req_waitqueue);
+	INIT_LIST_HEAD(&fw->sections);
+	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
+
+	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
+	if (irq <= 0)
+		return -ENODEV;
+
+	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
+	if (ret) {
+		drm_err(&ptdev->base, "failed to request job irq");
+		return ret;
+	}
+
+	ret = panthor_gpu_l2_power_on(ptdev);
+	if (ret)
+		return ret;
+
+	fw->vm = panthor_vm_create(ptdev, true,
+				   0, SZ_4G,
+				   CSF_MCU_SHARED_REGION_START,
+				   CSF_MCU_SHARED_REGION_SIZE);
+	if (IS_ERR(fw->vm)) {
+		ret = PTR_ERR(fw->vm);
+		fw->vm = NULL;
+		goto err_unplug_fw;
+	}
+
+	ret = panthor_fw_load(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	ret = panthor_vm_active(fw->vm);
+	if (ret)
+		goto err_unplug_fw;
+
+	ret = panthor_fw_start(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	ret = panthor_fw_init_ifaces(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	panthor_fw_init_global_iface(ptdev);
+	return 0;
+
+err_unplug_fw:
+	panthor_fw_unplug(ptdev);
+	return ret;
+}
+
+MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h
new file mode 100644
index 000000000000..22448abde992
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_fw.h
@@ -0,0 +1,503 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_MCU_H__
+#define __PANTHOR_MCU_H__
+
+#include <linux/types.h>
+
+struct panthor_device;
+struct panthor_kernel_bo;
+
+#define MAX_CSGS				31
+#define MAX_CS_PER_CSG                          32
+
+struct panthor_fw_ringbuf_input_iface {
+	u64 insert;
+	u64 extract;
+};
+
+struct panthor_fw_ringbuf_output_iface {
+	u64 extract;
+	u32 active;
+};
+
+struct panthor_fw_cs_control_iface {
+#define CS_FEATURES_WORK_REGS(x)		(((x) & GENMASK(7, 0)) + 1)
+#define CS_FEATURES_SCOREBOARDS(x)		(((x) & GENMASK(15, 8)) >> 8)
+#define CS_FEATURES_COMPUTE			BIT(16)
+#define CS_FEATURES_FRAGMENT			BIT(17)
+#define CS_FEATURES_TILER			BIT(18)
+	u32 features;
+	u32 input_va;
+	u32 output_va;
+};
+
+struct panthor_fw_cs_input_iface {
+#define CS_STATE_MASK				GENMASK(2, 0)
+#define CS_STATE_STOP				0
+#define CS_STATE_START				1
+#define CS_EXTRACT_EVENT			BIT(4)
+#define CS_IDLE_SYNC_WAIT			BIT(8)
+#define CS_IDLE_PROTM_PENDING			BIT(9)
+#define CS_IDLE_EMPTY				BIT(10)
+#define CS_IDLE_RESOURCE_REQ			BIT(11)
+#define CS_TILER_OOM				BIT(26)
+#define CS_PROTM_PENDING			BIT(27)
+#define CS_FATAL				BIT(30)
+#define CS_FAULT				BIT(31)
+#define CS_REQ_MASK				(CS_STATE_MASK | \
+						 CS_EXTRACT_EVENT | \
+						 CS_IDLE_SYNC_WAIT | \
+						 CS_IDLE_PROTM_PENDING | \
+						 CS_IDLE_EMPTY | \
+						 CS_IDLE_RESOURCE_REQ)
+#define CS_EVT_MASK				(CS_TILER_OOM | \
+						 CS_PROTM_PENDING | \
+						 CS_FATAL | \
+						 CS_FAULT)
+	u32 req;
+
+#define CS_CONFIG_PRIORITY(x)			((x) & GENMASK(3, 0))
+#define CS_CONFIG_DOORBELL(x)			(((x) << 8) & GENMASK(15, 8))
+	u32 config;
+	u32 reserved1;
+	u32 ack_irq_mask;
+	u64 ringbuf_base;
+	u32 ringbuf_size;
+	u32 reserved2;
+	u64 heap_start;
+	u64 heap_end;
+	u64 ringbuf_input;
+	u64 ringbuf_output;
+	u32 instr_config;
+	u32 instrbuf_size;
+	u64 instrbuf_base;
+	u64 instrbuf_offset_ptr;
+};
+
+struct panthor_fw_cs_output_iface {
+	u32 ack;
+	u32 reserved1[15];
+	u64 status_cmd_ptr;
+
+#define CS_STATUS_WAIT_SB_MASK			GENMASK(15, 0)
+#define CS_STATUS_WAIT_SB_SRC_MASK		GENMASK(19, 16)
+#define CS_STATUS_WAIT_SB_SRC_NONE		(0 << 16)
+#define CS_STATUS_WAIT_SB_SRC_WAIT		(8 << 16)
+#define CS_STATUS_WAIT_SYNC_COND_LE		(0 << 24)
+#define CS_STATUS_WAIT_SYNC_COND_GT		(1 << 24)
+#define CS_STATUS_WAIT_SYNC_COND_MASK		GENMASK(27, 24)
+#define CS_STATUS_WAIT_PROGRESS			BIT(28)
+#define CS_STATUS_WAIT_PROTM			BIT(29)
+#define CS_STATUS_WAIT_SYNC_64B			BIT(30)
+#define CS_STATUS_WAIT_SYNC			BIT(31)
+	u32 status_wait;
+	u32 status_req_resource;
+	u64 status_wait_sync_ptr;
+	u32 status_wait_sync_value;
+	u32 status_scoreboards;
+
+#define CS_STATUS_BLOCKED_REASON_UNBLOCKED	0
+#define CS_STATUS_BLOCKED_REASON_SB_WAIT	1
+#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT	2
+#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT	3
+#define CS_STATUS_BLOCKED_REASON_DEFERRED	5
+#define CS_STATUS_BLOCKED_REASON_RES		6
+#define CS_STATUS_BLOCKED_REASON_FLUSH		7
+#define CS_STATUS_BLOCKED_REASON_MASK		GENMASK(3, 0)
+	u32 status_blocked_reason;
+	u32 status_wait_sync_value_hi;
+	u32 reserved2[6];
+
+#define CS_EXCEPTION_TYPE(x)			((x) & GENMASK(7, 0))
+#define CS_EXCEPTION_DATA(x)			(((x) >> 8) & GENMASK(23, 0))
+	u32 fault;
+	u32 fatal;
+	u64 fault_info;
+	u64 fatal_info;
+	u32 reserved3[10];
+	u32 heap_vt_start;
+	u32 heap_vt_end;
+	u32 reserved4;
+	u32 heap_frag_end;
+	u64 heap_address;
+};
+
+struct panthor_fw_csg_control_iface {
+	u32 features;
+	u32 input_va;
+	u32 output_va;
+	u32 suspend_size;
+	u32 protm_suspend_size;
+	u32 stream_num;
+	u32 stream_stride;
+};
+
+struct panthor_fw_csg_input_iface {
+#define CSG_STATE_MASK				GENMASK(2, 0)
+#define CSG_STATE_TERMINATE			0
+#define CSG_STATE_START				1
+#define CSG_STATE_SUSPEND			2
+#define CSG_STATE_RESUME			3
+#define CSG_ENDPOINT_CONFIG			BIT(4)
+#define CSG_STATUS_UPDATE			BIT(5)
+#define CSG_SYNC_UPDATE				BIT(28)
+#define CSG_IDLE				BIT(29)
+#define CSG_DOORBELL				BIT(30)
+#define CSG_PROGRESS_TIMER_EVENT		BIT(31)
+#define CSG_REQ_MASK				(CSG_STATE_MASK | \
+						 CSG_ENDPOINT_CONFIG | \
+						 CSG_STATUS_UPDATE)
+#define CSG_EVT_MASK				(CSG_SYNC_UPDATE | \
+						 CSG_IDLE | \
+						 CSG_PROGRESS_TIMER_EVENT)
+	u32 req;
+	u32 ack_irq_mask;
+
+	u32 doorbell_req;
+	u32 cs_irq_ack;
+	u32 reserved1[4];
+	u64 allow_compute;
+	u64 allow_fragment;
+	u32 allow_other;
+
+#define CSG_EP_REQ_COMPUTE(x)			((x) & GENMASK(7, 0))
+#define CSG_EP_REQ_FRAGMENT(x)			(((x) << 8) & GENMASK(15, 8))
+#define CSG_EP_REQ_TILER(x)			(((x) << 16) & GENMASK(19, 16))
+#define CSG_EP_REQ_EXCL_COMPUTE			BIT(20)
+#define CSG_EP_REQ_EXCL_FRAGMENT		BIT(21)
+#define CSG_EP_REQ_PRIORITY(x)			(((x) << 28) & GENMASK(31, 28))
+#define CSG_EP_REQ_PRIORITY_MASK		GENMASK(31, 28)
+	u32 endpoint_req;
+	u32 reserved2[2];
+	u64 suspend_buf;
+	u64 protm_suspend_buf;
+	u32 config;
+	u32 iter_trace_config;
+};
+
+struct panthor_fw_csg_output_iface {
+	u32 ack;
+	u32 reserved1;
+	u32 doorbell_ack;
+	u32 cs_irq_req;
+	u32 status_endpoint_current;
+	u32 status_endpoint_req;
+
+#define CSG_STATUS_STATE_IS_IDLE		BIT(0)
+	u32 status_state;
+	u32 resource_dep;
+};
+
+struct panthor_fw_global_control_iface {
+	u32 version;
+	u32 features;
+	u32 input_va;
+	u32 output_va;
+	u32 group_num;
+	u32 group_stride;
+	u32 perfcnt_size;
+	u32 instr_features;
+};
+
+struct panthor_fw_global_input_iface {
+#define GLB_HALT				BIT(0)
+#define GLB_CFG_PROGRESS_TIMER			BIT(1)
+#define GLB_CFG_ALLOC_EN			BIT(2)
+#define GLB_CFG_POWEROFF_TIMER			BIT(3)
+#define GLB_PROTM_ENTER				BIT(4)
+#define GLB_PERFCNT_EN				BIT(5)
+#define GLB_PERFCNT_SAMPLE			BIT(6)
+#define GLB_COUNTER_EN				BIT(7)
+#define GLB_PING				BIT(8)
+#define GLB_FWCFG_UPDATE			BIT(9)
+#define GLB_IDLE_EN				BIT(10)
+#define GLB_SLEEP				BIT(12)
+#define GLB_INACTIVE_COMPUTE			BIT(20)
+#define GLB_INACTIVE_FRAGMENT			BIT(21)
+#define GLB_INACTIVE_TILER			BIT(22)
+#define GLB_PROTM_EXIT				BIT(23)
+#define GLB_PERFCNT_THRESHOLD			BIT(24)
+#define GLB_PERFCNT_OVERFLOW			BIT(25)
+#define GLB_IDLE				BIT(26)
+#define GLB_DBG_CSF				BIT(30)
+#define GLB_DBG_HOST				BIT(31)
+#define GLB_REQ_MASK				GENMASK(10, 0)
+#define GLB_EVT_MASK				GENMASK(26, 20)
+	u32 req;
+	u32 ack_irq_mask;
+	u32 doorbell_req;
+	u32 reserved1;
+	u32 progress_timer;
+
+#define GLB_TIMER_VAL(x)			((x) & GENMASK(30, 0))
+#define GLB_TIMER_SOURCE_GPU_COUNTER		BIT(31)
+	u32 poweroff_timer;
+	u64 core_en_mask;
+	u32 reserved2;
+	u32 perfcnt_as;
+	u64 perfcnt_base;
+	u32 perfcnt_extract;
+	u32 reserved3[3];
+	u32 perfcnt_config;
+	u32 perfcnt_csg_select;
+	u32 perfcnt_fw_enable;
+	u32 perfcnt_csg_enable;
+	u32 perfcnt_csf_enable;
+	u32 perfcnt_shader_enable;
+	u32 perfcnt_tiler_enable;
+	u32 perfcnt_mmu_l2_enable;
+	u32 reserved4[8];
+	u32 idle_timer;
+};
+
+enum panthor_fw_halt_status {
+	PANTHOR_FW_HALT_OK = 0,
+	PANTHOR_FW_HALT_ON_PANIC = 0x4e,
+	PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
+};
+
+struct panthor_fw_global_output_iface {
+	u32 ack;
+	u32 reserved1;
+	u32 doorbell_ack;
+	u32 reserved2;
+	u32 halt_status;
+	u32 perfcnt_status;
+	u32 perfcnt_insert;
+};
+
+/**
+ * struct panthor_fw_cs_iface - Firmware command stream slot interface
+ */
+struct panthor_fw_cs_iface {
+	/**
+	 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
+	 * field.
+	 *
+	 * Needed so we can update the req field concurrently from the interrupt
+	 * handler and the scheduler logic.
+	 *
+	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
+	 * interface sections are mapped uncached/write-combined right now, and
+	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
+	 * we have 'SHARED' GPU mappings hooked up.
+	 */
+	spinlock_t lock;
+
+	/**
+	 * @control: Command stream slot control interface.
+	 *
+	 * Used to expose command stream slot properties.
+	 *
+	 * This interface is read-only.
+	 */
+	struct panthor_fw_cs_control_iface *control;
+
+	/**
+	 * @input: Command stream slot input interface.
+	 *
+	 * Used for host updates/events.
+	 */
+	struct panthor_fw_cs_input_iface *input;
+
+	/**
+	 * @output: Command stream slot output interface.
+	 *
+	 * Used for FW updates/events.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_cs_output_iface *output;
+};
+
+/**
+ * struct panthor_fw_csg_iface - Firmware command stream group slot interface
+ */
+struct panthor_fw_csg_iface {
+	/**
+	 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
+	 * field.
+	 *
+	 * Needed so we can update the req field concurrently from the interrupt
+	 * handler and the scheduler logic.
+	 *
+	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
+	 * interface sections are mapped uncached/write-combined right now, and
+	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
+	 * we have 'SHARED' GPU mappings hooked up.
+	 */
+	spinlock_t lock;
+
+	/**
+	 * @control: Command stream group slot control interface.
+	 *
+	 * Used to expose command stream group slot properties.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_csg_control_iface *control;
+
+	/**
+	 * @input: Command stream slot input interface.
+	 *
+	 * Used for host updates/events.
+	 */
+	struct panthor_fw_csg_input_iface *input;
+
+	/**
+	 * @output: Command stream group slot output interface.
+	 *
+	 * Used for FW updates/events.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_csg_output_iface *output;
+};
+
+/**
+ * struct panthor_fw_global_iface - Firmware global interface
+ */
+struct panthor_fw_global_iface {
+	/**
+	 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
+	 * field.
+	 *
+	 * Needed so we can update the req field concurrently from the interrupt
+	 * handler and the scheduler/FW management logic.
+	 *
+	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
+	 * interface sections are mapped uncached/write-combined right now, and
+	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
+	 * we have 'SHARED' GPU mappings hooked up.
+	 */
+	spinlock_t lock;
+
+	/**
+	 * @control: Command stream group slot control interface.
+	 *
+	 * Used to expose global FW properties.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_global_control_iface *control;
+
+	/**
+	 * @input: Global input interface.
+	 *
+	 * Used for host updates/events.
+	 */
+	struct panthor_fw_global_input_iface *input;
+
+	/**
+	 * @output: Global output interface.
+	 *
+	 * Used for FW updates/events.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_global_output_iface *output;
+};
+
+/**
+ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
+ * @__iface: The interface to operate on.
+ * @__in_reg: Name of the register to update in the input section of the interface.
+ * @__out_reg: Name of the register to take as a reference in the output section of the
+ * interface.
+ * @__mask: Mask to apply to the update.
+ *
+ * The Host -> FW event/message passing was designed to be lockless, with each side of
+ * the channel having its writeable section. Events are signaled as a difference between
+ * the host and FW side in the req/ack registers (when a bit differs, there's an event
+ * pending, when they are the same, nothing needs attention).
+ *
+ * This helper allows one to update the req register based on the current value of the
+ * ack register managed by the FW. Toggling a specific bit will flag an event. In order
+ * for events to be re-evaluated, the interface doorbell needs to be rung.
+ *
+ * Concurrent accesses to the same req register is covered.
+ *
+ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
+ */
+#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
+	do { \
+		u32 __cur_val, __new_val, __out_val; \
+		spin_lock(&(__iface)->lock); \
+		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
+		__out_val = READ_ONCE((__iface)->output->__out_reg); \
+		__new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
+		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
+		spin_unlock(&(__iface)->lock); \
+	} while (0)
+
+/**
+ * panthor_fw_update_reqs() - Update bits to reflect a configuration change
+ * @__iface: The interface to operate on.
+ * @__in_reg: Name of the register to update in the input section of the interface.
+ * @__val: Value to set.
+ * @__mask: Mask to apply to the update.
+ *
+ * Some configuration get passed through req registers that are also used to
+ * send events to the FW. Those req registers being updated from the interrupt
+ * handler, they require special helpers to update the configuration part as well.
+ *
+ * Concurrent accesses to the same req register is covered.
+ *
+ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
+ */
+#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
+	do { \
+		u32 __cur_val, __new_val; \
+		spin_lock(&(__iface)->lock); \
+		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
+		__new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
+		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
+		spin_unlock(&(__iface)->lock); \
+	} while (0)
+
+struct panthor_fw_global_iface *
+panthor_fw_get_glb_iface(struct panthor_device *ptdev);
+
+struct panthor_fw_csg_iface *
+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
+
+struct panthor_fw_cs_iface *
+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
+
+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
+			     u32 *acked, u32 timeout_ms);
+
+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
+			     u32 timeout_ms);
+
+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
+
+struct panthor_kernel_bo *
+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
+				 struct panthor_fw_ringbuf_input_iface **input,
+				 const struct panthor_fw_ringbuf_output_iface **output,
+				 u32 *input_fw_va, u32 *output_fw_va);
+struct panthor_kernel_bo *
+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
+
+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
+
+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
+int panthor_fw_post_reset(struct panthor_device *ptdev);
+
+static inline void panthor_fw_suspend(struct panthor_device *ptdev)
+{
+	panthor_fw_pre_reset(ptdev, false);
+}
+
+static inline int panthor_fw_resume(struct panthor_device *ptdev)
+{
+	return panthor_fw_post_reset(ptdev);
+}
+
+int panthor_fw_init(struct panthor_device *ptdev);
+void panthor_fw_unplug(struct panthor_device *ptdev);
+
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
new file mode 100644
index 000000000000..d6483266d0c2
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_mmu.h"
+
+static void panthor_gem_free_object(struct drm_gem_object *obj)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(obj);
+	struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem;
+
+	drm_gem_free_mmap_offset(&bo->base.base);
+	mutex_destroy(&bo->gpuva_list_lock);
+	drm_gem_shmem_free(&bo->base);
+	drm_gem_object_put(vm_root_gem);
+}
+
+/**
+ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object
+ * @vm: The VM this BO was mapped to.
+ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
+ * is skipped.
+ */
+void panthor_kernel_bo_destroy(struct panthor_vm *vm,
+			       struct panthor_kernel_bo *bo)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(bo))
+		return;
+
+	panthor_kernel_bo_vunmap(bo);
+
+	if (drm_WARN_ON(bo->obj->dev,
+			to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)))
+		goto out_free_bo;
+
+	ret = panthor_vm_unmap_range(vm, bo->va_node.start,
+				     panthor_kernel_bo_size(bo));
+	if (ret)
+		goto out_free_bo;
+
+	panthor_vm_free_va(vm, &bo->va_node);
+	drm_gem_object_put(bo->obj);
+
+out_free_bo:
+	kfree(bo);
+}
+
+/**
+ * panthor_kernel_bo_create() - Create and map a GEM object to a VM
+ * @ptdev: Device.
+ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped.
+ * @size: Size of the buffer object.
+ * @bo_flags: Combination of drm_panthor_bo_flags flags.
+ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those
+ * that are related to map operations).
+ * @gpu_va: GPU address assigned when mapping to the VM.
+ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be
+ * automatically allocated.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
+			 size_t size, u32 bo_flags, u32 vm_map_flags,
+			 u64 gpu_va)
+{
+	struct drm_gem_shmem_object *obj;
+	struct panthor_kernel_bo *kbo;
+	struct panthor_gem_object *bo;
+	int ret;
+
+	if (drm_WARN_ON(&ptdev->base, !vm))
+		return ERR_PTR(-EINVAL);
+
+	kbo = kzalloc(sizeof(*kbo), GFP_KERNEL);
+	if (!kbo)
+		return ERR_PTR(-ENOMEM);
+
+	obj = drm_gem_shmem_create(&ptdev->base, size);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto err_free_bo;
+	}
+
+	bo = to_panthor_bo(&obj->base);
+	size = obj->base.size;
+	kbo->obj = &obj->base;
+	bo->flags = bo_flags;
+
+	ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node);
+	if (ret)
+		goto err_put_obj;
+
+	ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags);
+	if (ret)
+		goto err_free_va;
+
+	bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
+	drm_gem_object_get(bo->exclusive_vm_root_gem);
+	bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
+	return kbo;
+
+err_free_va:
+	panthor_vm_free_va(vm, &kbo->va_node);
+
+err_put_obj:
+	drm_gem_object_put(&obj->base);
+
+err_free_bo:
+	kfree(kbo);
+	return ERR_PTR(ret);
+}
+
+static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(obj);
+
+	/* Don't allow mmap on objects that have the NO_MMAP flag set. */
+	if (bo->flags & DRM_PANTHOR_BO_NO_MMAP)
+		return -EINVAL;
+
+	return drm_gem_shmem_object_mmap(obj, vma);
+}
+
+static struct dma_buf *
+panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+	/* We can't export GEMs that have an exclusive VM. */
+	if (to_panthor_bo(obj)->exclusive_vm_root_gem)
+		return ERR_PTR(-EINVAL);
+
+	return drm_gem_prime_export(obj, flags);
+}
+
+static const struct drm_gem_object_funcs panthor_gem_funcs = {
+	.free = panthor_gem_free_object,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = panthor_gem_mmap,
+	.export = panthor_gem_prime_export,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * panthor_gem_create_object - Implementation of driver->gem_create_object.
+ * @ddev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct panthor_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	obj->base.base.funcs = &panthor_gem_funcs;
+	obj->base.map_wc = !ptdev->coherent;
+	mutex_init(&obj->gpuva_list_lock);
+	drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock);
+
+	return &obj->base.base;
+}
+
+/**
+ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_panthor_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int
+panthor_gem_create_with_handle(struct drm_file *file,
+			       struct drm_device *ddev,
+			       struct panthor_vm *exclusive_vm,
+			       u64 *size, u32 flags, u32 *handle)
+{
+	int ret;
+	struct drm_gem_shmem_object *shmem;
+	struct panthor_gem_object *bo;
+
+	shmem = drm_gem_shmem_create(ddev, *size);
+	if (IS_ERR(shmem))
+		return PTR_ERR(shmem);
+
+	bo = to_panthor_bo(&shmem->base);
+	bo->flags = flags;
+
+	if (exclusive_vm) {
+		bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
+		drm_gem_object_get(bo->exclusive_vm_root_gem);
+		bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
+	}
+
+	/*
+	 * Allocate an id of idr table where the obj is registered
+	 * and handle has the id what user can see.
+	 */
+	ret = drm_gem_handle_create(file, &shmem->base, handle);
+	if (!ret)
+		*size = bo->base.base.size;
+
+	/* drop reference from allocate - handle holds it now. */
+	drm_gem_object_put(&shmem->base);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h
new file mode 100644
index 000000000000..3bccba394d00
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_GEM_H__
+#define __PANTHOR_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+#include <linux/iosys-map.h>
+#include <linux/rwsem.h>
+
+struct panthor_vm;
+
+/**
+ * struct panthor_gem_object - Driver specific GEM object.
+ */
+struct panthor_gem_object {
+	/** @base: Inherit from drm_gem_shmem_object. */
+	struct drm_gem_shmem_object base;
+
+	/**
+	 * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object
+	 * is attached to.
+	 *
+	 * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a
+	 * different VM will fail.
+	 *
+	 * All FW memory objects have this field set to the root GEM of the MCU
+	 * VM.
+	 */
+	struct drm_gem_object *exclusive_vm_root_gem;
+
+	/**
+	 * @gpuva_list_lock: Custom GPUVA lock.
+	 *
+	 * Used to protect insertion of drm_gpuva elements to the
+	 * drm_gem_object.gpuva.list list.
+	 *
+	 * We can't use the GEM resv for that, because drm_gpuva_link() is
+	 * called in a dma-signaling path, where we're not allowed to take
+	 * resv locks.
+	 */
+	struct mutex gpuva_list_lock;
+
+	/** @flags: Combination of drm_panthor_bo_flags flags. */
+	u32 flags;
+};
+
+/**
+ * struct panthor_kernel_bo - Kernel buffer object.
+ *
+ * These objects are only manipulated by the kernel driver and not
+ * directly exposed to the userspace. The GPU address of a kernel
+ * BO might be passed to userspace though.
+ */
+struct panthor_kernel_bo {
+	/**
+	 * @obj: The GEM object backing this kernel buffer object.
+	 */
+	struct drm_gem_object *obj;
+
+	/**
+	 * @va_node: VA space allocated to this GEM.
+	 */
+	struct drm_mm_node va_node;
+
+	/**
+	 * @kmap: Kernel CPU mapping of @gem.
+	 */
+	void *kmap;
+};
+
+static inline
+struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj)
+{
+	return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base);
+}
+
+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size);
+
+struct drm_gem_object *
+panthor_gem_prime_import_sg_table(struct drm_device *ddev,
+				  struct dma_buf_attachment *attach,
+				  struct sg_table *sgt);
+
+int
+panthor_gem_create_with_handle(struct drm_file *file,
+			       struct drm_device *ddev,
+			       struct panthor_vm *exclusive_vm,
+			       u64 *size, u32 flags, uint32_t *handle);
+
+static inline u64
+panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
+{
+	return bo->va_node.start;
+}
+
+static inline size_t
+panthor_kernel_bo_size(struct panthor_kernel_bo *bo)
+{
+	return bo->obj->size;
+}
+
+static inline int
+panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo)
+{
+	struct iosys_map map;
+	int ret;
+
+	if (bo->kmap)
+		return 0;
+
+	ret = drm_gem_vmap_unlocked(bo->obj, &map);
+	if (ret)
+		return ret;
+
+	bo->kmap = map.vaddr;
+	return 0;
+}
+
+static inline void
+panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo)
+{
+	if (bo->kmap) {
+		struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap);
+
+		drm_gem_vunmap_unlocked(bo->obj, &map);
+		bo->kmap = NULL;
+	}
+}
+
+struct panthor_kernel_bo *
+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
+			 size_t size, u32 bo_flags, u32 vm_map_flags,
+			 u64 gpu_va);
+
+void panthor_kernel_bo_destroy(struct panthor_vm *vm,
+			       struct panthor_kernel_bo *bo);
+
+#endif /* __PANTHOR_GEM_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
new file mode 100644
index 000000000000..0f7c962440d3
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_device.h"
+#include "panthor_gpu.h"
+#include "panthor_regs.h"
+
+/**
+ * struct panthor_gpu - GPU block management data.
+ */
+struct panthor_gpu {
+	/** @irq: GPU irq. */
+	struct panthor_irq irq;
+
+	/** @reqs_lock: Lock protecting access to pending_reqs. */
+	spinlock_t reqs_lock;
+
+	/** @pending_reqs: Pending GPU requests. */
+	u32 pending_reqs;
+
+	/** @reqs_acked: GPU request wait queue. */
+	wait_queue_head_t reqs_acked;
+};
+
+/**
+ * struct panthor_model - GPU model description
+ */
+struct panthor_model {
+	/** @name: Model name. */
+	const char *name;
+
+	/** @arch_major: Major version number of architecture. */
+	u8 arch_major;
+
+	/** @product_major: Major version number of product. */
+	u8 product_major;
+};
+
+/**
+ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified
+ * by a combination of the major architecture version and the major product
+ * version.
+ * @_name: Name for the GPU model.
+ * @_arch_major: Architecture major.
+ * @_product_major: Product major.
+ */
+#define GPU_MODEL(_name, _arch_major, _product_major) \
+{\
+	.name = __stringify(_name),				\
+	.arch_major = _arch_major,				\
+	.product_major = _product_major,			\
+}
+
+static const struct panthor_model gpu_models[] = {
+	GPU_MODEL(g610, 10, 7),
+	{},
+};
+
+#define GPU_INTERRUPTS_MASK	\
+	(GPU_IRQ_FAULT | \
+	 GPU_IRQ_PROTM_FAULT | \
+	 GPU_IRQ_RESET_COMPLETED | \
+	 GPU_IRQ_CLEAN_CACHES_COMPLETED)
+
+static void panthor_gpu_init_info(struct panthor_device *ptdev)
+{
+	const struct panthor_model *model;
+	u32 arch_major, product_major;
+	u32 major, minor, status;
+	unsigned int i;
+
+	ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID);
+	ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID);
+	ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID);
+	ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES);
+	ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES);
+	ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES);
+	ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES);
+	ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES);
+	ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES);
+	ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS);
+	ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+	ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE);
+	ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES);
+	for (i = 0; i < 4; i++)
+		ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i));
+
+	ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT);
+
+	ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO);
+	ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32;
+
+	ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO);
+	ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32;
+
+	ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO);
+	ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32;
+
+	arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id);
+	product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id);
+	major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id);
+	minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id);
+	status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id);
+
+	for (model = gpu_models; model->name; model++) {
+		if (model->arch_major == arch_major &&
+		    model->product_major == product_major)
+			break;
+	}
+
+	drm_info(&ptdev->base,
+		 "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x",
+		 model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16,
+		 major, minor, status);
+
+	drm_info(&ptdev->base,
+		 "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x",
+		 ptdev->gpu_info.l2_features,
+		 ptdev->gpu_info.tiler_features,
+		 ptdev->gpu_info.mem_features,
+		 ptdev->gpu_info.mmu_features,
+		 ptdev->gpu_info.as_present);
+
+	drm_info(&ptdev->base,
+		 "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx",
+		 ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present,
+		 ptdev->gpu_info.tiler_present);
+}
+
+static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+	if (status & GPU_IRQ_FAULT) {
+		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
+		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
+			      gpu_read(ptdev, GPU_FAULT_ADDR_LO);
+
+		drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
+			 fault_status, panthor_exception_name(ptdev, fault_status & 0xFF),
+			 address);
+	}
+	if (status & GPU_IRQ_PROTM_FAULT)
+		drm_warn(&ptdev->base, "GPU Fault in protected mode\n");
+
+	spin_lock(&ptdev->gpu->reqs_lock);
+	if (status & ptdev->gpu->pending_reqs) {
+		ptdev->gpu->pending_reqs &= ~status;
+		wake_up_all(&ptdev->gpu->reqs_acked);
+	}
+	spin_unlock(&ptdev->gpu->reqs_lock);
+}
+PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
+
+/**
+ * panthor_gpu_unplug() - Called when the GPU is unplugged.
+ * @ptdev: Device to unplug.
+ */
+void panthor_gpu_unplug(struct panthor_device *ptdev)
+{
+	unsigned long flags;
+
+	/* Make sure the IRQ handler is not running after that point. */
+	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+
+	/* Wake-up all waiters. */
+	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+	ptdev->gpu->pending_reqs = 0;
+	wake_up_all(&ptdev->gpu->reqs_acked);
+	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+}
+
+/**
+ * panthor_gpu_init() - Initialize the GPU block
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_init(struct panthor_device *ptdev)
+{
+	struct panthor_gpu *gpu;
+	u32 pa_bits;
+	int ret, irq;
+
+	gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL);
+	if (!gpu)
+		return -ENOMEM;
+
+	spin_lock_init(&gpu->reqs_lock);
+	init_waitqueue_head(&gpu->reqs_acked);
+	ptdev->gpu = gpu;
+	panthor_gpu_init_info(ptdev);
+
+	dma_set_max_seg_size(ptdev->base.dev, UINT_MAX);
+	pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
+	ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits));
+	if (ret)
+		return ret;
+
+	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
+	if (irq <= 0)
+		return ret;
+
+	ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU
+ * @ptdev: Device.
+ * @blk_name: Block name.
+ * @pwroff_reg: Power-off register for this block.
+ * @pwrtrans_reg: Power transition register for this block.
+ * @mask: Sub-elements to power-off.
+ * @timeout_us: Timeout in microseconds.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_block_power_off(struct panthor_device *ptdev,
+				const char *blk_name,
+				u32 pwroff_reg, u32 pwrtrans_reg,
+				u64 mask, u32 timeout_us)
+{
+	u32 val, i;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+						 val, !(mask32 & val),
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	if (mask & GENMASK(31, 0))
+		gpu_write(ptdev, pwroff_reg, mask);
+
+	if (mask >> 32)
+		gpu_write(ptdev, pwroff_reg + 4, mask >> 32);
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+						 val, !(mask32 & val),
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU
+ * @ptdev: Device.
+ * @blk_name: Block name.
+ * @pwron_reg: Power-on register for this block.
+ * @pwrtrans_reg: Power transition register for this block.
+ * @rdy_reg: Power transition ready register.
+ * @mask: Sub-elements to power-on.
+ * @timeout_us: Timeout in microseconds.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_block_power_on(struct panthor_device *ptdev,
+			       const char *blk_name,
+			       u32 pwron_reg, u32 pwrtrans_reg,
+			       u32 rdy_reg, u64 mask, u32 timeout_us)
+{
+	u32 val, i;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+						 val, !(mask32 & val),
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	if (mask & GENMASK(31, 0))
+		gpu_write(ptdev, pwron_reg, mask);
+
+	if (mask >> 32)
+		gpu_write(ptdev, pwron_reg + 4, mask >> 32);
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4),
+						 val, (mask32 & val) == mask32,
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_l2_power_on() - Power-on the L2-cache
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
+{
+	if (ptdev->gpu_info.l2_present != 1) {
+		/*
+		 * Only support one core group now.
+		 * ~(l2_present - 1) unsets all bits in l2_present except
+		 * the bottom bit. (l2_present - 2) has all the bits in
+		 * the first core group set. AND them together to generate
+		 * a mask of cores in the first core group.
+		 */
+		u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) &
+				(ptdev->gpu_info.l2_present - 2);
+		drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n",
+			      hweight64(core_mask),
+			      hweight64(ptdev->gpu_info.shader_present));
+	}
+
+	return panthor_gpu_power_on(ptdev, L2, 1, 20000);
+}
+
+/**
+ * panthor_gpu_flush_caches() - Flush caches
+ * @ptdev: Device.
+ * @l2: L2 flush type.
+ * @lsc: LSC flush type.
+ * @other: Other flush type.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_flush_caches(struct panthor_device *ptdev,
+			     u32 l2, u32 lsc, u32 other)
+{
+	bool timedout = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+	if (!drm_WARN_ON(&ptdev->base,
+			 ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
+		ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
+		gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
+	}
+	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+
+	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
+				!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
+				msecs_to_jiffies(100))) {
+		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+		if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
+		    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED))
+			timedout = true;
+		else
+			ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED;
+		spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+	}
+
+	if (timedout) {
+		drm_err(&ptdev->base, "Flush caches timeout");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_soft_reset() - Issue a soft-reset
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_soft_reset(struct panthor_device *ptdev)
+{
+	bool timedout = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+	if (!drm_WARN_ON(&ptdev->base,
+			 ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) {
+		ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED;
+		gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
+		gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET);
+	}
+	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+
+	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
+				!(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED),
+				msecs_to_jiffies(100))) {
+		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+		if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 &&
+		    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED))
+			timedout = true;
+		else
+			ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED;
+		spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+	}
+
+	if (timedout) {
+		drm_err(&ptdev->base, "Soft reset timeout");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_suspend() - Suspend the GPU block.
+ * @ptdev: Device.
+ *
+ * Suspend the GPU irq. This should be called last in the suspend procedure,
+ * after all other blocks have been suspented.
+ */
+void panthor_gpu_suspend(struct panthor_device *ptdev)
+{
+	/*
+	 * It may be preferable to simply power down the L2, but for now just
+	 * soft-reset which will leave the L2 powered down.
+	 */
+	panthor_gpu_soft_reset(ptdev);
+	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+}
+
+/**
+ * panthor_gpu_resume() - Resume the GPU block.
+ * @ptdev: Device.
+ *
+ * Resume the IRQ handler and power-on the L2-cache.
+ * The FW takes care of powering the other blocks.
+ */
+void panthor_gpu_resume(struct panthor_device *ptdev)
+{
+	panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
+	panthor_gpu_l2_power_on(ptdev);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h
new file mode 100644
index 000000000000..bba7555dd3c6
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gpu.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANTHOR_GPU_H__
+#define __PANTHOR_GPU_H__
+
+struct panthor_device;
+
+int panthor_gpu_init(struct panthor_device *ptdev);
+void panthor_gpu_unplug(struct panthor_device *ptdev);
+void panthor_gpu_suspend(struct panthor_device *ptdev);
+void panthor_gpu_resume(struct panthor_device *ptdev);
+
+int panthor_gpu_block_power_on(struct panthor_device *ptdev,
+			       const char *blk_name,
+			       u32 pwron_reg, u32 pwrtrans_reg,
+			       u32 rdy_reg, u64 mask, u32 timeout_us);
+int panthor_gpu_block_power_off(struct panthor_device *ptdev,
+				const char *blk_name,
+				u32 pwroff_reg, u32 pwrtrans_reg,
+				u64 mask, u32 timeout_us);
+
+/**
+ * panthor_gpu_power_on() - Power on the GPU block.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \
+	panthor_gpu_block_power_on(ptdev, #type, \
+				  type ## _PWRON_LO, \
+				  type ## _PWRTRANS_LO, \
+				  type ## _READY_LO, \
+				  mask, timeout_us)
+
+/**
+ * panthor_gpu_power_off() - Power off the GPU block.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \
+	panthor_gpu_block_power_off(ptdev, #type, \
+				   type ## _PWROFF_LO, \
+				   type ## _PWRTRANS_LO, \
+				   mask, timeout_us)
+
+int panthor_gpu_l2_power_on(struct panthor_device *ptdev);
+int panthor_gpu_flush_caches(struct panthor_device *ptdev,
+			     u32 l2, u32 lsc, u32 other);
+int panthor_gpu_soft_reset(struct panthor_device *ptdev);
+
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
new file mode 100644
index 000000000000..143fa35f2e74
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/iosys-map.h>
+#include <linux/rwsem.h>
+
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+
+/*
+ * The GPU heap context is an opaque structure used by the GPU to track the
+ * heap allocations. The driver should only touch it to initialize it (zero all
+ * fields). Because the CPU and GPU can both access this structure it is
+ * required to be GPU cache line aligned.
+ */
+#define HEAP_CONTEXT_SIZE	32
+
+/**
+ * struct panthor_heap_chunk_header - Heap chunk header
+ */
+struct panthor_heap_chunk_header {
+	/**
+	 * @next: Next heap chunk in the list.
+	 *
+	 * This is a GPU VA.
+	 */
+	u64 next;
+
+	/** @unknown: MBZ. */
+	u32 unknown[14];
+};
+
+/**
+ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
+ */
+struct panthor_heap_chunk {
+	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
+	struct list_head node;
+
+	/** @bo: Buffer object backing the heap chunk. */
+	struct panthor_kernel_bo *bo;
+};
+
+/**
+ * struct panthor_heap - Structure used to manage tiler heap contexts.
+ */
+struct panthor_heap {
+	/** @chunks: List containing all heap chunks allocated so far. */
+	struct list_head chunks;
+
+	/** @lock: Lock protecting insertion in the chunks list. */
+	struct mutex lock;
+
+	/** @chunk_size: Size of each chunk. */
+	u32 chunk_size;
+
+	/** @max_chunks: Maximum number of chunks. */
+	u32 max_chunks;
+
+	/**
+	 * @target_in_flight: Number of in-flight render passes after which
+	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
+	 */
+	u32 target_in_flight;
+
+	/** @chunk_count: Number of heap chunks currently allocated. */
+	u32 chunk_count;
+};
+
+#define MAX_HEAPS_PER_POOL    128
+
+/**
+ * struct panthor_heap_pool - Pool of heap contexts
+ *
+ * The pool is attached to a panthor_file and can't be shared across processes.
+ */
+struct panthor_heap_pool {
+	/** @refcount: Reference count. */
+	struct kref refcount;
+
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/** @vm: VM this pool is bound to. */
+	struct panthor_vm *vm;
+
+	/** @lock: Lock protecting access to @xa. */
+	struct rw_semaphore lock;
+
+	/** @xa: Array storing panthor_heap objects. */
+	struct xarray xa;
+
+	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
+	struct panthor_kernel_bo *gpu_contexts;
+};
+
+static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
+{
+	u32 l2_features = ptdev->gpu_info.l2_features;
+	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
+
+	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
+}
+
+static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
+{
+	return panthor_heap_ctx_stride(pool->ptdev) * id;
+}
+
+static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
+{
+	return pool->gpu_contexts->kmap +
+	       panthor_get_heap_ctx_offset(pool, id);
+}
+
+static void panthor_free_heap_chunk(struct panthor_vm *vm,
+				    struct panthor_heap *heap,
+				    struct panthor_heap_chunk *chunk)
+{
+	mutex_lock(&heap->lock);
+	list_del(&chunk->node);
+	heap->chunk_count--;
+	mutex_unlock(&heap->lock);
+
+	panthor_kernel_bo_destroy(vm, chunk->bo);
+	kfree(chunk);
+}
+
+static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
+				    struct panthor_vm *vm,
+				    struct panthor_heap *heap,
+				    bool initial_chunk)
+{
+	struct panthor_heap_chunk *chunk;
+	struct panthor_heap_chunk_header *hdr;
+	int ret;
+
+	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
+	if (!chunk)
+		return -ENOMEM;
+
+	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
+					     DRM_PANTHOR_BO_NO_MMAP,
+					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+					     PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(chunk->bo)) {
+		ret = PTR_ERR(chunk->bo);
+		goto err_free_chunk;
+	}
+
+	ret = panthor_kernel_bo_vmap(chunk->bo);
+	if (ret)
+		goto err_destroy_bo;
+
+	hdr = chunk->bo->kmap;
+	memset(hdr, 0, sizeof(*hdr));
+
+	if (initial_chunk && !list_empty(&heap->chunks)) {
+		struct panthor_heap_chunk *prev_chunk;
+		u64 prev_gpuva;
+
+		prev_chunk = list_first_entry(&heap->chunks,
+					      struct panthor_heap_chunk,
+					      node);
+
+		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
+		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
+			    (heap->chunk_size >> 12);
+	}
+
+	panthor_kernel_bo_vunmap(chunk->bo);
+
+	mutex_lock(&heap->lock);
+	list_add(&chunk->node, &heap->chunks);
+	heap->chunk_count++;
+	mutex_unlock(&heap->lock);
+
+	return 0;
+
+err_destroy_bo:
+	panthor_kernel_bo_destroy(vm, chunk->bo);
+
+err_free_chunk:
+	kfree(chunk);
+
+	return ret;
+}
+
+static void panthor_free_heap_chunks(struct panthor_vm *vm,
+				     struct panthor_heap *heap)
+{
+	struct panthor_heap_chunk *chunk, *tmp;
+
+	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
+		panthor_free_heap_chunk(vm, heap, chunk);
+}
+
+static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
+				     struct panthor_vm *vm,
+				     struct panthor_heap *heap,
+				     u32 chunk_count)
+{
+	int ret;
+	u32 i;
+
+	for (i = 0; i < chunk_count; i++) {
+		ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
+{
+	struct panthor_heap *heap;
+
+	heap = xa_erase(&pool->xa, handle);
+	if (!heap)
+		return -EINVAL;
+
+	panthor_free_heap_chunks(pool->vm, heap);
+	mutex_destroy(&heap->lock);
+	kfree(heap);
+	return 0;
+}
+
+/**
+ * panthor_heap_destroy() - Destroy a heap context
+ * @pool: Pool this context belongs to.
+ * @handle: Handle returned by panthor_heap_create().
+ */
+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
+{
+	int ret;
+
+	down_write(&pool->lock);
+	ret = panthor_heap_destroy_locked(pool, handle);
+	up_write(&pool->lock);
+
+	return ret;
+}
+
+/**
+ * panthor_heap_create() - Create a heap context
+ * @pool: Pool to instantiate the heap context from.
+ * @initial_chunk_count: Number of chunk allocated at initialization time.
+ * Must be at least 1.
+ * @chunk_size: The size of each chunk. Must be a power of two between 256k
+ * and 2M.
+ * @max_chunks: Maximum number of chunks that can be allocated.
+ * @target_in_flight: Maximum number of in-flight render passes.
+ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
+ * context.
+ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
+ * assigned to the heap context.
+ *
+ * Return: a positive handle on success, a negative error otherwise.
+ */
+int panthor_heap_create(struct panthor_heap_pool *pool,
+			u32 initial_chunk_count,
+			u32 chunk_size,
+			u32 max_chunks,
+			u32 target_in_flight,
+			u64 *heap_ctx_gpu_va,
+			u64 *first_chunk_gpu_va)
+{
+	struct panthor_heap *heap;
+	struct panthor_heap_chunk *first_chunk;
+	struct panthor_vm *vm;
+	int ret = 0;
+	u32 id;
+
+	if (initial_chunk_count == 0)
+		return -EINVAL;
+
+	if (hweight32(chunk_size) != 1 ||
+	    chunk_size < SZ_256K || chunk_size > SZ_2M)
+		return -EINVAL;
+
+	down_read(&pool->lock);
+	vm = panthor_vm_get(pool->vm);
+	up_read(&pool->lock);
+
+	/* The pool has been destroyed, we can't create a new heap. */
+	if (!vm)
+		return -EINVAL;
+
+	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
+	if (!heap) {
+		ret = -ENOMEM;
+		goto err_put_vm;
+	}
+
+	mutex_init(&heap->lock);
+	INIT_LIST_HEAD(&heap->chunks);
+	heap->chunk_size = chunk_size;
+	heap->max_chunks = max_chunks;
+	heap->target_in_flight = target_in_flight;
+
+	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
+					initial_chunk_count);
+	if (ret)
+		goto err_free_heap;
+
+	first_chunk = list_first_entry(&heap->chunks,
+				       struct panthor_heap_chunk,
+				       node);
+	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
+
+	down_write(&pool->lock);
+	/* The pool has been destroyed, we can't create a new heap. */
+	if (!pool->vm) {
+		ret = -EINVAL;
+	} else {
+		ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
+		if (!ret) {
+			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
+
+			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
+			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
+					   panthor_get_heap_ctx_offset(pool, id);
+		}
+	}
+	up_write(&pool->lock);
+
+	if (ret)
+		goto err_free_heap;
+
+	panthor_vm_put(vm);
+	return id;
+
+err_free_heap:
+	panthor_free_heap_chunks(pool->vm, heap);
+	mutex_destroy(&heap->lock);
+	kfree(heap);
+
+err_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+/**
+ * panthor_heap_return_chunk() - Return an unused heap chunk
+ * @pool: The pool this heap belongs to.
+ * @heap_gpu_va: The GPU address of the heap context.
+ * @chunk_gpu_va: The chunk VA to return.
+ *
+ * This function is used when a chunk allocated with panthor_heap_grow()
+ * couldn't be linked to the heap context through the FW interface because
+ * the group requesting the allocation was scheduled out in the meantime.
+ */
+int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
+			      u64 heap_gpu_va,
+			      u64 chunk_gpu_va)
+{
+	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
+	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
+	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
+	struct panthor_heap *heap;
+	int ret;
+
+	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
+		return -EINVAL;
+
+	down_read(&pool->lock);
+	heap = xa_load(&pool->xa, heap_id);
+	if (!heap) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	chunk_gpu_va &= GENMASK_ULL(63, 12);
+
+	mutex_lock(&heap->lock);
+	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
+		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
+			removed = chunk;
+			list_del(&chunk->node);
+			heap->chunk_count--;
+			break;
+		}
+	}
+	mutex_unlock(&heap->lock);
+
+	if (removed) {
+		panthor_kernel_bo_destroy(pool->vm, chunk->bo);
+		kfree(chunk);
+		ret = 0;
+	} else {
+		ret = -EINVAL;
+	}
+
+out_unlock:
+	up_read(&pool->lock);
+	return ret;
+}
+
+/**
+ * panthor_heap_grow() - Make a heap context grow.
+ * @pool: The pool this heap belongs to.
+ * @heap_gpu_va: The GPU address of the heap context.
+ * @renderpasses_in_flight: Number of render passes currently in-flight.
+ * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
+ * @new_chunk_gpu_va: Pointer used to return the chunk VA.
+ */
+int panthor_heap_grow(struct panthor_heap_pool *pool,
+		      u64 heap_gpu_va,
+		      u32 renderpasses_in_flight,
+		      u32 pending_frag_count,
+		      u64 *new_chunk_gpu_va)
+{
+	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
+	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
+	struct panthor_heap_chunk *chunk;
+	struct panthor_heap *heap;
+	int ret;
+
+	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
+		return -EINVAL;
+
+	down_read(&pool->lock);
+	heap = xa_load(&pool->xa, heap_id);
+	if (!heap) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* If we reached the target in-flight render passes, or if we
+	 * reached the maximum number of chunks, let the FW figure another way to
+	 * find some memory (wait for render passes to finish, or call the exception
+	 * handler provided by the userspace driver, if any).
+	 */
+	if (renderpasses_in_flight > heap->target_in_flight ||
+	    (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	} else if (heap->chunk_count >= heap->max_chunks) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
+	 * which goes through the blocking allocation path. Ultimately, we
+	 * want a non-blocking allocation, so we can immediately report to the
+	 * FW when the system is running out of memory. In that case, the FW
+	 * can call a user-provided exception handler, which might try to free
+	 * some tiler memory by issuing an intermediate fragment job. If the
+	 * exception handler can't do anything, it will flag the queue as
+	 * faulty so the job that triggered this tiler chunk allocation and all
+	 * further jobs in this queue fail immediately instead of having to
+	 * wait for the job timeout.
+	 */
+	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
+	if (ret)
+		goto out_unlock;
+
+	chunk = list_first_entry(&heap->chunks,
+				 struct panthor_heap_chunk,
+				 node);
+	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
+			    (heap->chunk_size >> 12);
+	ret = 0;
+
+out_unlock:
+	up_read(&pool->lock);
+	return ret;
+}
+
+static void panthor_heap_pool_release(struct kref *refcount)
+{
+	struct panthor_heap_pool *pool =
+		container_of(refcount, struct panthor_heap_pool, refcount);
+
+	xa_destroy(&pool->xa);
+	kfree(pool);
+}
+
+/**
+ * panthor_heap_pool_put() - Release a heap pool reference
+ * @pool: Pool to release the reference on. Can be NULL.
+ */
+void panthor_heap_pool_put(struct panthor_heap_pool *pool)
+{
+	if (pool)
+		kref_put(&pool->refcount, panthor_heap_pool_release);
+}
+
+/**
+ * panthor_heap_pool_get() - Get a heap pool reference
+ * @pool: Pool to get the reference on. Can be NULL.
+ *
+ * Return: @pool.
+ */
+struct panthor_heap_pool *
+panthor_heap_pool_get(struct panthor_heap_pool *pool)
+{
+	if (pool)
+		kref_get(&pool->refcount);
+
+	return pool;
+}
+
+/**
+ * panthor_heap_pool_create() - Create a heap pool
+ * @ptdev: Device.
+ * @vm: The VM this heap pool will be attached to.
+ *
+ * Heap pools might contain up to 128 heap contexts, and are per-VM.
+ *
+ * Return: A valid pointer on success, a negative error code otherwise.
+ */
+struct panthor_heap_pool *
+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
+{
+	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
+			      panthor_heap_ctx_stride(ptdev),
+			      4096);
+	struct panthor_heap_pool *pool;
+	int ret = 0;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return ERR_PTR(-ENOMEM);
+
+	/* We want a weak ref here: the heap pool belongs to the VM, so we're
+	 * sure that, as long as the heap pool exists, the VM exists too.
+	 */
+	pool->vm = vm;
+	pool->ptdev = ptdev;
+	init_rwsem(&pool->lock);
+	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
+	kref_init(&pool->refcount);
+
+	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
+						      DRM_PANTHOR_BO_NO_MMAP,
+						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+						      PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(pool->gpu_contexts)) {
+		ret = PTR_ERR(pool->gpu_contexts);
+		goto err_destroy_pool;
+	}
+
+	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
+	if (ret)
+		goto err_destroy_pool;
+
+	return pool;
+
+err_destroy_pool:
+	panthor_heap_pool_destroy(pool);
+	return ERR_PTR(ret);
+}
+
+/**
+ * panthor_heap_pool_destroy() - Destroy a heap pool.
+ * @pool: Pool to destroy.
+ *
+ * This function destroys all heap contexts and their resources. Thus
+ * preventing any use of the heap context or the chunk attached to them
+ * after that point.
+ *
+ * If the GPU still has access to some heap contexts, a fault should be
+ * triggered, which should flag the command stream groups using these
+ * context as faulty.
+ *
+ * The heap pool object is only released when all references to this pool
+ * are released.
+ */
+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
+{
+	struct panthor_heap *heap;
+	unsigned long i;
+
+	if (!pool)
+		return;
+
+	down_write(&pool->lock);
+	xa_for_each(&pool->xa, i, heap)
+		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
+
+	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
+		panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
+
+	/* Reflects the fact the pool has been destroyed. */
+	pool->vm = NULL;
+	up_write(&pool->lock);
+
+	panthor_heap_pool_put(pool);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_heap.h b/drivers/gpu/drm/panthor/panthor_heap.h
new file mode 100644
index 000000000000..25a5f2bba445
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_heap.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_HEAP_H__
+#define __PANTHOR_HEAP_H__
+
+#include <linux/types.h>
+
+struct panthor_device;
+struct panthor_heap_pool;
+struct panthor_vm;
+
+int panthor_heap_create(struct panthor_heap_pool *pool,
+			u32 initial_chunk_count,
+			u32 chunk_size,
+			u32 max_chunks,
+			u32 target_in_flight,
+			u64 *heap_ctx_gpu_va,
+			u64 *first_chunk_gpu_va);
+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle);
+
+struct panthor_heap_pool *
+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm);
+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool);
+
+struct panthor_heap_pool *
+panthor_heap_pool_get(struct panthor_heap_pool *pool);
+void panthor_heap_pool_put(struct panthor_heap_pool *pool);
+
+int panthor_heap_grow(struct panthor_heap_pool *pool,
+		      u64 heap_gpu_va,
+		      u32 renderpasses_in_flight,
+		      u32 pending_frag_count,
+		      u64 *new_chunk_gpu_va);
+int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
+			      u64 heap_gpu_va,
+			      u64 chunk_gpu_va);
+
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
new file mode 100644
index 000000000000..fdd35249169f
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -0,0 +1,2768 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gpuvm.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/kmemleak.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/shmem_fs.h>
+#include <linux/sizes.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+#define MAX_AS_SLOTS			32
+
+struct panthor_vm;
+
+/**
+ * struct panthor_as_slot - Address space slot
+ */
+struct panthor_as_slot {
+	/** @vm: VM bound to this slot. NULL is no VM is bound. */
+	struct panthor_vm *vm;
+};
+
+/**
+ * struct panthor_mmu - MMU related data
+ */
+struct panthor_mmu {
+	/** @irq: The MMU irq. */
+	struct panthor_irq irq;
+
+	/** @as: Address space related fields.
+	 *
+	 * The GPU has a limited number of address spaces (AS) slots, forcing
+	 * us to re-assign them to re-assign slots on-demand.
+	 */
+	struct {
+		/** @slots_lock: Lock protecting access to all other AS fields. */
+		struct mutex slots_lock;
+
+		/** @alloc_mask: Bitmask encoding the allocated slots. */
+		unsigned long alloc_mask;
+
+		/** @faulty_mask: Bitmask encoding the faulty slots. */
+		unsigned long faulty_mask;
+
+		/** @slots: VMs currently bound to the AS slots. */
+		struct panthor_as_slot slots[MAX_AS_SLOTS];
+
+		/**
+		 * @lru_list: List of least recently used VMs.
+		 *
+		 * We use this list to pick a VM to evict when all slots are
+		 * used.
+		 *
+		 * There should be no more active VMs than there are AS slots,
+		 * so this LRU is just here to keep VMs bound until there's
+		 * a need to release a slot, thus avoid unnecessary TLB/cache
+		 * flushes.
+		 */
+		struct list_head lru_list;
+	} as;
+
+	/** @vm: VMs management fields */
+	struct {
+		/** @lock: Lock protecting access to list. */
+		struct mutex lock;
+
+		/** @list: List containing all VMs. */
+		struct list_head list;
+
+		/** @reset_in_progress: True if a reset is in progress. */
+		bool reset_in_progress;
+
+		/** @wq: Workqueue used for the VM_BIND queues. */
+		struct workqueue_struct *wq;
+	} vm;
+};
+
+/**
+ * struct panthor_vm_pool - VM pool object
+ */
+struct panthor_vm_pool {
+	/** @xa: Array used for VM handle tracking. */
+	struct xarray xa;
+};
+
+/**
+ * struct panthor_vma - GPU mapping object
+ *
+ * This is used to track GEM mappings in GPU space.
+ */
+struct panthor_vma {
+	/** @base: Inherits from drm_gpuva. */
+	struct drm_gpuva base;
+
+	/** @node: Used to implement deferred release of VMAs. */
+	struct list_head node;
+
+	/**
+	 * @flags: Combination of drm_panthor_vm_bind_op_flags.
+	 *
+	 * Only map related flags are accepted.
+	 */
+	u32 flags;
+};
+
+/**
+ * struct panthor_vm_op_ctx - VM operation context
+ *
+ * With VM operations potentially taking place in a dma-signaling path, we
+ * need to make sure everything that might require resource allocation is
+ * pre-allocated upfront. This is what this operation context is far.
+ *
+ * We also collect resources that have been freed, so we can release them
+ * asynchronously, and let the VM_BIND scheduler process the next VM_BIND
+ * request.
+ */
+struct panthor_vm_op_ctx {
+	/** @rsvd_page_tables: Pages reserved for the MMU page table update. */
+	struct {
+		/** @count: Number of pages reserved. */
+		u32 count;
+
+		/** @ptr: Point to the first unused page in the @pages table. */
+		u32 ptr;
+
+		/**
+		 * @page: Array of pages that can be used for an MMU page table update.
+		 *
+		 * After an VM operation, there might be free pages left in this array.
+		 * They should be returned to the pt_cache as part of the op_ctx cleanup.
+		 */
+		void **pages;
+	} rsvd_page_tables;
+
+	/**
+	 * @preallocated_vmas: Pre-allocated VMAs to handle the remap case.
+	 *
+	 * Partial unmap requests or map requests overlapping existing mappings will
+	 * trigger a remap call, which need to register up to three panthor_vma objects
+	 * (one for the new mapping, and two for the previous and next mappings).
+	 */
+	struct panthor_vma *preallocated_vmas[3];
+
+	/** @flags: Combination of drm_panthor_vm_bind_op_flags. */
+	u32 flags;
+
+	/** @va: Virtual range targeted by the VM operation. */
+	struct {
+		/** @addr: Start address. */
+		u64 addr;
+
+		/** @range: Range size. */
+		u64 range;
+	} va;
+
+	/**
+	 * @returned_vmas: List of panthor_vma objects returned after a VM operation.
+	 *
+	 * For unmap operations, this will contain all VMAs that were covered by the
+	 * specified VA range.
+	 *
+	 * For map operations, this will contain all VMAs that previously mapped to
+	 * the specified VA range.
+	 *
+	 * Those VMAs, and the resources they point to will be released as part of
+	 * the op_ctx cleanup operation.
+	 */
+	struct list_head returned_vmas;
+
+	/** @map: Fields specific to a map operation. */
+	struct {
+		/** @vm_bo: Buffer object to map. */
+		struct drm_gpuvm_bo *vm_bo;
+
+		/** @bo_offset: Offset in the buffer object. */
+		u64 bo_offset;
+
+		/**
+		 * @sgt: sg-table pointing to pages backing the GEM object.
+		 *
+		 * This is gathered at job creation time, such that we don't have
+		 * to allocate in ::run_job().
+		 */
+		struct sg_table *sgt;
+
+		/**
+		 * @new_vma: The new VMA object that will be inserted to the VA tree.
+		 */
+		struct panthor_vma *new_vma;
+	} map;
+};
+
+/**
+ * struct panthor_vm - VM object
+ *
+ * A VM is an object representing a GPU (or MCU) virtual address space.
+ * It embeds the MMU page table for this address space, a tree containing
+ * all the virtual mappings of GEM objects, and other things needed to manage
+ * the VM.
+ *
+ * Except for the MCU VM, which is managed by the kernel, all other VMs are
+ * created by userspace and mostly managed by userspace, using the
+ * %DRM_IOCTL_PANTHOR_VM_BIND ioctl.
+ *
+ * A portion of the virtual address space is reserved for kernel objects,
+ * like heap chunks, and userspace gets to decide how much of the virtual
+ * address space is left to the kernel (half of the virtual address space
+ * by default).
+ */
+struct panthor_vm {
+	/**
+	 * @base: Inherit from drm_gpuvm.
+	 *
+	 * We delegate all the VA management to the common drm_gpuvm framework
+	 * and only implement hooks to update the MMU page table.
+	 */
+	struct drm_gpuvm base;
+
+	/**
+	 * @sched: Scheduler used for asynchronous VM_BIND request.
+	 *
+	 * We use a 1:1 scheduler here.
+	 */
+	struct drm_gpu_scheduler sched;
+
+	/**
+	 * @entity: Scheduling entity representing the VM_BIND queue.
+	 *
+	 * There's currently one bind queue per VM. It doesn't make sense to
+	 * allow more given the VM operations are serialized anyway.
+	 */
+	struct drm_sched_entity entity;
+
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/** @memattr: Value to program to the AS_MEMATTR register. */
+	u64 memattr;
+
+	/** @pgtbl_ops: Page table operations. */
+	struct io_pgtable_ops *pgtbl_ops;
+
+	/** @root_page_table: Stores the root page table pointer. */
+	void *root_page_table;
+
+	/**
+	 * @op_lock: Lock used to serialize operations on a VM.
+	 *
+	 * The serialization of jobs queued to the VM_BIND queue is already
+	 * taken care of by drm_sched, but we need to serialize synchronous
+	 * and asynchronous VM_BIND request. This is what this lock is for.
+	 */
+	struct mutex op_lock;
+
+	/**
+	 * @op_ctx: The context attached to the currently executing VM operation.
+	 *
+	 * NULL when no operation is in progress.
+	 */
+	struct panthor_vm_op_ctx *op_ctx;
+
+	/**
+	 * @mm: Memory management object representing the auto-VA/kernel-VA.
+	 *
+	 * Used to auto-allocate VA space for kernel-managed objects (tiler
+	 * heaps, ...).
+	 *
+	 * For the MCU VM, this is managing the VA range that's used to map
+	 * all shared interfaces.
+	 *
+	 * For user VMs, the range is specified by userspace, and must not
+	 * exceed half of the VA space addressable.
+	 */
+	struct drm_mm mm;
+
+	/** @mm_lock: Lock protecting the @mm field. */
+	struct mutex mm_lock;
+
+	/** @kernel_auto_va: Automatic VA-range for kernel BOs. */
+	struct {
+		/** @start: Start of the automatic VA-range for kernel BOs. */
+		u64 start;
+
+		/** @size: Size of the automatic VA-range for kernel BOs. */
+		u64 end;
+	} kernel_auto_va;
+
+	/** @as: Address space related fields. */
+	struct {
+		/**
+		 * @id: ID of the address space this VM is bound to.
+		 *
+		 * A value of -1 means the VM is inactive/not bound.
+		 */
+		int id;
+
+		/** @active_cnt: Number of active users of this VM. */
+		refcount_t active_cnt;
+
+		/**
+		 * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
+		 *
+		 * Active VMs should not be inserted in the LRU list.
+		 */
+		struct list_head lru_node;
+	} as;
+
+	/**
+	 * @heaps: Tiler heap related fields.
+	 */
+	struct {
+		/**
+		 * @pool: The heap pool attached to this VM.
+		 *
+		 * Will stay NULL until someone creates a heap context on this VM.
+		 */
+		struct panthor_heap_pool *pool;
+
+		/** @lock: Lock used to protect access to @pool. */
+		struct mutex lock;
+	} heaps;
+
+	/** @node: Used to insert the VM in the panthor_mmu::vm::list. */
+	struct list_head node;
+
+	/** @for_mcu: True if this is the MCU VM. */
+	bool for_mcu;
+
+	/**
+	 * @destroyed: True if the VM was destroyed.
+	 *
+	 * No further bind requests should be queued to a destroyed VM.
+	 */
+	bool destroyed;
+
+	/**
+	 * @unusable: True if the VM has turned unusable because something
+	 * bad happened during an asynchronous request.
+	 *
+	 * We don't try to recover from such failures, because this implies
+	 * informing userspace about the specific operation that failed, and
+	 * hoping the userspace driver can replay things from there. This all
+	 * sounds very complicated for little gain.
+	 *
+	 * Instead, we should just flag the VM as unusable, and fail any
+	 * further request targeting this VM.
+	 *
+	 * We also provide a way to query a VM state, so userspace can destroy
+	 * it and create a new one.
+	 *
+	 * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST
+	 * situation, where the logical device needs to be re-created.
+	 */
+	bool unusable;
+
+	/**
+	 * @unhandled_fault: Unhandled fault happened.
+	 *
+	 * This should be reported to the scheduler, and the queue/group be
+	 * flagged as faulty as a result.
+	 */
+	bool unhandled_fault;
+};
+
+/**
+ * struct panthor_vm_bind_job - VM bind job
+ */
+struct panthor_vm_bind_job {
+	/** @base: Inherit from drm_sched_job. */
+	struct drm_sched_job base;
+
+	/** @refcount: Reference count. */
+	struct kref refcount;
+
+	/** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */
+	struct work_struct cleanup_op_ctx_work;
+
+	/** @vm: VM targeted by the VM operation. */
+	struct panthor_vm *vm;
+
+	/** @ctx: Operation context. */
+	struct panthor_vm_op_ctx ctx;
+};
+
+/**
+ * @pt_cache: Cache used to allocate MMU page tables.
+ *
+ * The pre-allocation pattern forces us to over-allocate to plan for
+ * the worst case scenario, and return the pages we didn't use.
+ *
+ * Having a kmem_cache allows us to speed allocations.
+ */
+static struct kmem_cache *pt_cache;
+
+/**
+ * alloc_pt() - Custom page table allocator
+ * @cookie: Cookie passed at page table allocation time.
+ * @size: Size of the page table. This size should be fixed,
+ * and determined at creation time based on the granule size.
+ * @gfp: GFP flags.
+ *
+ * We want a custom allocator so we can use a cache for page table
+ * allocations and amortize the cost of the over-reservation that's
+ * done to allow asynchronous VM operations.
+ *
+ * Return: non-NULL on success, NULL if the allocation failed for any
+ * reason.
+ */
+static void *alloc_pt(void *cookie, size_t size, gfp_t gfp)
+{
+	struct panthor_vm *vm = cookie;
+	void *page;
+
+	/* Allocation of the root page table happening during init. */
+	if (unlikely(!vm->root_page_table)) {
+		struct page *p;
+
+		drm_WARN_ON(&vm->ptdev->base, vm->op_ctx);
+		p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev),
+				     gfp | __GFP_ZERO, get_order(size));
+		page = p ? page_address(p) : NULL;
+		vm->root_page_table = page;
+		return page;
+	}
+
+	/* We're not supposed to have anything bigger than 4k here, because we picked a
+	 * 4k granule size at init time.
+	 */
+	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
+		return NULL;
+
+	/* We must have some op_ctx attached to the VM and it must have at least one
+	 * free page.
+	 */
+	if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) ||
+	    drm_WARN_ON(&vm->ptdev->base,
+			vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count))
+		return NULL;
+
+	page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++];
+	memset(page, 0, SZ_4K);
+
+	/* Page table entries don't use virtual addresses, which trips out
+	 * kmemleak. kmemleak_alloc_phys() might work, but physical addresses
+	 * are mixed with other fields, and I fear kmemleak won't detect that
+	 * either.
+	 *
+	 * Let's just ignore memory passed to the page-table driver for now.
+	 */
+	kmemleak_ignore(page);
+	return page;
+}
+
+/**
+ * @free_pt() - Custom page table free function
+ * @cookie: Cookie passed at page table allocation time.
+ * @data: Page table to free.
+ * @size: Size of the page table. This size should be fixed,
+ * and determined at creation time based on the granule size.
+ */
+static void free_pt(void *cookie, void *data, size_t size)
+{
+	struct panthor_vm *vm = cookie;
+
+	if (unlikely(vm->root_page_table == data)) {
+		free_pages((unsigned long)data, get_order(size));
+		vm->root_page_table = NULL;
+		return;
+	}
+
+	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
+		return;
+
+	/* Return the page to the pt_cache. */
+	kmem_cache_free(pt_cache, data);
+}
+
+static int wait_ready(struct panthor_device *ptdev, u32 as_nr)
+{
+	int ret;
+	u32 val;
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending.
+	 */
+	ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr),
+						val, !(val & AS_STATUS_AS_ACTIVE),
+						10, 100000);
+
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n");
+	}
+
+	return ret;
+}
+
+static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(ptdev, as_nr);
+	if (!status)
+		gpu_write(ptdev, AS_COMMAND(as_nr), cmd);
+
+	return status;
+}
+
+static void lock_region(struct panthor_device *ptdev, u32 as_nr,
+			u64 region_start, u64 size)
+{
+	u8 region_width;
+	u64 region;
+	u64 region_end = region_start + size;
+
+	if (!size)
+		return;
+
+	/*
+	 * The locked region is a naturally aligned power of 2 block encoded as
+	 * log2 minus(1).
+	 * Calculate the desired start/end and look for the highest bit which
+	 * differs. The smallest naturally aligned block must include this bit
+	 * change, the desired region starts with this bit (and subsequent bits)
+	 * zeroed and ends with the bit (and subsequent bits) set to one.
+	 */
+	region_width = max(fls64(region_start ^ (region_end - 1)),
+			   const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1;
+
+	/*
+	 * Mask off the low bits of region_start (which would be ignored by
+	 * the hardware anyway)
+	 */
+	region_start &= GENMASK_ULL(63, region_width);
+
+	region = region_width | region_start;
+
+	/* Lock the region that needs to be updated */
+	gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region));
+	gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region));
+	write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
+}
+
+static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
+				      u64 iova, u64 size, u32 op)
+{
+	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
+
+	if (as_nr < 0)
+		return 0;
+
+	if (op != AS_COMMAND_UNLOCK)
+		lock_region(ptdev, as_nr, iova, size);
+
+	/* Run the MMU operation */
+	write_cmd(ptdev, as_nr, op);
+
+	/* Wait for the flush to complete */
+	return wait_ready(ptdev, as_nr);
+}
+
+static int mmu_hw_do_operation(struct panthor_vm *vm,
+			       u64 iova, u64 size, u32 op)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	int ret;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op);
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	return ret;
+}
+
+static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
+				 u64 transtab, u64 transcfg, u64 memattr)
+{
+	int ret;
+
+	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+	if (ret)
+		return ret;
+
+	gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab));
+	gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab));
+
+	gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
+	gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
+
+	gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg));
+	gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg));
+
+	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr)
+{
+	int ret;
+
+	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+	if (ret)
+		return ret;
+
+	gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0);
+	gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0);
+
+	gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0);
+	gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0);
+
+	gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
+	gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0);
+
+	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value)
+{
+	/* Bits 16 to 31 mean REQ_COMPLETE. */
+	return value & GENMASK(15, 0);
+}
+
+static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as)
+{
+	return BIT(as);
+}
+
+/**
+ * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults
+ * @vm: VM to check.
+ *
+ * Return: true if the VM has unhandled faults, false otherwise.
+ */
+bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm)
+{
+	return vm->unhandled_fault;
+}
+
+/**
+ * panthor_vm_is_unusable() - Check if the VM is still usable
+ * @vm: VM to check.
+ *
+ * Return: true if the VM is unusable, false otherwise.
+ */
+bool panthor_vm_is_unusable(struct panthor_vm *vm)
+{
+	return vm->unusable;
+}
+
+static void panthor_vm_release_as_locked(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+
+	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
+
+	if (drm_WARN_ON(&ptdev->base, vm->as.id < 0))
+		return;
+
+	ptdev->mmu->as.slots[vm->as.id].vm = NULL;
+	clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
+	refcount_set(&vm->as.active_cnt, 0);
+	list_del_init(&vm->as.lru_node);
+	vm->as.id = -1;
+}
+
+/**
+ * panthor_vm_active() - Flag a VM as active
+ * @VM: VM to flag as active.
+ *
+ * Assigns an address space to a VM so it can be used by the GPU/MCU.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_active(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
+	int ret = 0, as, cookie;
+	u64 transtab, transcfg;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return -ENODEV;
+
+	if (refcount_inc_not_zero(&vm->as.active_cnt))
+		goto out_dev_exit;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+
+	if (refcount_inc_not_zero(&vm->as.active_cnt))
+		goto out_unlock;
+
+	as = vm->as.id;
+	if (as >= 0) {
+		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
+		 * re-enable the MMU after clearing+unmasking the AS interrupts.
+		 */
+		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
+			goto out_enable_as;
+
+		goto out_make_active;
+	}
+
+	/* Check for a free AS */
+	if (vm->for_mcu) {
+		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
+		as = 0;
+	} else {
+		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
+	}
+
+	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
+		struct panthor_vm *lru_vm;
+
+		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
+						  struct panthor_vm,
+						  as.lru_node);
+		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+
+		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
+		as = lru_vm->as.id;
+		panthor_vm_release_as_locked(lru_vm);
+	}
+
+	/* Assign the free or reclaimed AS to the FD */
+	vm->as.id = as;
+	set_bit(as, &ptdev->mmu->as.alloc_mask);
+	ptdev->mmu->as.slots[as].vm = vm;
+
+out_enable_as:
+	transtab = cfg->arm_lpae_s1_cfg.ttbr;
+	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
+		   AS_TRANSCFG_PTW_RA |
+		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
+		   AS_TRANSCFG_INA_BITS(55 - va_bits);
+	if (ptdev->coherent)
+		transcfg |= AS_TRANSCFG_PTW_SH_OS;
+
+	/* If the VM is re-activated, we clear the fault. */
+	vm->unhandled_fault = false;
+
+	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
+	 * before enabling the AS.
+	 */
+	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
+		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
+		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
+		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
+	}
+
+	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
+
+out_make_active:
+	if (!ret) {
+		refcount_set(&vm->as.active_cnt, 1);
+		list_del_init(&vm->as.lru_node);
+	}
+
+out_unlock:
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+/**
+ * panthor_vm_idle() - Flag a VM idle
+ * @VM: VM to flag as idle.
+ *
+ * When we know the GPU is done with the VM (no more jobs to process),
+ * we can relinquish the AS slot attached to this VM, if any.
+ *
+ * We don't release the slot immediately, but instead place the VM in
+ * the LRU list, so it can be evicted if another VM needs an AS slot.
+ * This way, VMs keep attached to the AS they were given until we run
+ * out of free slot, limiting the number of MMU operations (TLB flush
+ * and other AS updates).
+ */
+void panthor_vm_idle(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+
+	if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock))
+		return;
+
+	if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node)))
+		list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list);
+
+	refcount_set(&vm->as.active_cnt, 0);
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+}
+
+static void panthor_vm_stop(struct panthor_vm *vm)
+{
+	drm_sched_stop(&vm->sched, NULL);
+}
+
+static void panthor_vm_start(struct panthor_vm *vm)
+{
+	drm_sched_start(&vm->sched, true);
+}
+
+/**
+ * panthor_vm_as() - Get the AS slot attached to a VM
+ * @vm: VM to get the AS slot of.
+ *
+ * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise.
+ */
+int panthor_vm_as(struct panthor_vm *vm)
+{
+	return vm->as.id;
+}
+
+static size_t get_pgsize(u64 addr, size_t size, size_t *count)
+{
+	/*
+	 * io-pgtable only operates on multiple pages within a single table
+	 * entry, so we need to split at boundaries of the table size, i.e.
+	 * the next block size up. The distance from address A to the next
+	 * boundary of block size B is logically B - A % B, but in unsigned
+	 * two's complement where B is a power of two we get the equivalence
+	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
+	 */
+	size_t blk_offset = -addr % SZ_2M;
+
+	if (blk_offset || size < SZ_2M) {
+		*count = min_not_zero(blk_offset, size) / SZ_4K;
+		return SZ_4K;
+	}
+	blk_offset = -addr % SZ_1G ?: SZ_1G;
+	*count = min(blk_offset, size) / SZ_2M;
+	return SZ_2M;
+}
+
+static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	int ret = 0, cookie;
+
+	if (vm->as.id < 0)
+		return 0;
+
+	/* If the device is unplugged, we just silently skip the flush. */
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return 0;
+
+	/* Flush the PTs only if we're already awake */
+	if (pm_runtime_active(ptdev->base.dev))
+		ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	struct io_pgtable_ops *ops = vm->pgtbl_ops;
+	u64 offset = 0;
+
+	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
+
+	while (offset < size) {
+		size_t unmapped_sz = 0, pgcount;
+		size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
+
+		unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL);
+
+		if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
+			drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n",
+				iova + offset + unmapped_sz,
+				iova + offset + pgsize * pgcount,
+				iova, iova + size);
+			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
+			return  -EINVAL;
+		}
+		offset += unmapped_sz;
+	}
+
+	return panthor_vm_flush_range(vm, iova, size);
+}
+
+static int
+panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
+		     struct sg_table *sgt, u64 offset, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	unsigned int count;
+	struct scatterlist *sgl;
+	struct io_pgtable_ops *ops = vm->pgtbl_ops;
+	u64 start_iova = iova;
+	int ret;
+
+	if (!size)
+		return 0;
+
+	for_each_sgtable_dma_sg(sgt, sgl, count) {
+		dma_addr_t paddr = sg_dma_address(sgl);
+		size_t len = sg_dma_len(sgl);
+
+		if (len <= offset) {
+			offset -= len;
+			continue;
+		}
+
+		paddr += offset;
+		len -= offset;
+		len = min_t(size_t, len, size);
+		size -= len;
+
+		drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx",
+			vm->as.id, iova, &paddr, len);
+
+		while (len) {
+			size_t pgcount, mapped = 0;
+			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
+
+			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
+					     GFP_KERNEL, &mapped);
+			iova += mapped;
+			paddr += mapped;
+			len -= mapped;
+
+			if (drm_WARN_ON(&ptdev->base, !ret && !mapped))
+				ret = -ENOMEM;
+
+			if (ret) {
+				/* If something failed, unmap what we've already mapped before
+				 * returning. The unmap call is not supposed to fail.
+				 */
+				drm_WARN_ON(&ptdev->base,
+					    panthor_vm_unmap_pages(vm, start_iova,
+								   iova - start_iova));
+				return ret;
+			}
+		}
+
+		if (!size)
+			break;
+	}
+
+	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
+}
+
+static int flags_to_prot(u32 flags)
+{
+	int prot = 0;
+
+	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC)
+		prot |= IOMMU_NOEXEC;
+
+	if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED))
+		prot |= IOMMU_CACHE;
+
+	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY)
+		prot |= IOMMU_READ;
+	else
+		prot |= IOMMU_READ | IOMMU_WRITE;
+
+	return prot;
+}
+
+/**
+ * panthor_vm_alloc_va() - Allocate a region in the auto-va space
+ * @VM: VM to allocate a region on.
+ * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
+ * wants the VA to be automatically allocated from the auto-VA range.
+ * @size: size of the VA range.
+ * @va_node: drm_mm_node to initialize. Must be zero-initialized.
+ *
+ * Some GPU objects, like heap chunks, are fully managed by the kernel and
+ * need to be mapped to the userspace VM, in the region reserved for kernel
+ * objects.
+ *
+ * This function takes care of allocating a region in the kernel auto-VA space.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+int
+panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
+		    struct drm_mm_node *va_node)
+{
+	int ret;
+
+	if (!size || (size & ~PAGE_MASK))
+		return -EINVAL;
+
+	if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK))
+		return -EINVAL;
+
+	mutex_lock(&vm->mm_lock);
+	if (va != PANTHOR_VM_KERNEL_AUTO_VA) {
+		va_node->start = va;
+		va_node->size = size;
+		ret = drm_mm_reserve_node(&vm->mm, va_node);
+	} else {
+		ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size,
+						  size >= SZ_2M ? SZ_2M : SZ_4K,
+						  0, vm->kernel_auto_va.start,
+						  vm->kernel_auto_va.end,
+						  DRM_MM_INSERT_BEST);
+	}
+	mutex_unlock(&vm->mm_lock);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
+ * @VM: VM to free the region on.
+ * @va_node: Memory node representing the region to free.
+ */
+void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
+{
+	mutex_lock(&vm->mm_lock);
+	drm_mm_remove_node(va_node);
+	mutex_unlock(&vm->mm_lock);
+}
+
+static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj);
+	struct drm_gpuvm *vm = vm_bo->vm;
+	bool unpin;
+
+	/* We must retain the GEM before calling drm_gpuvm_bo_put(),
+	 * otherwise the mutex might be destroyed while we hold it.
+	 * Same goes for the VM, since we take the VM resv lock.
+	 */
+	drm_gem_object_get(&bo->base.base);
+	drm_gpuvm_get(vm);
+
+	/* We take the resv lock to protect against concurrent accesses to the
+	 * gpuvm evicted/extobj lists that are modified in
+	 * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put()
+	 * releases sthe last vm_bo reference.
+	 * We take the BO GPUVA list lock to protect the vm_bo removal from the
+	 * GEM vm_bo list.
+	 */
+	dma_resv_lock(drm_gpuvm_resv(vm), NULL);
+	mutex_lock(&bo->gpuva_list_lock);
+	unpin = drm_gpuvm_bo_put(vm_bo);
+	mutex_unlock(&bo->gpuva_list_lock);
+	dma_resv_unlock(drm_gpuvm_resv(vm));
+
+	/* If the vm_bo object was destroyed, release the pin reference that
+	 * was hold by this object.
+	 */
+	if (unpin && !bo->base.base.import_attach)
+		drm_gem_shmem_unpin(&bo->base);
+
+	drm_gpuvm_put(vm);
+	drm_gem_object_put(&bo->base.base);
+}
+
+static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+				      struct panthor_vm *vm)
+{
+	struct panthor_vma *vma, *tmp_vma;
+
+	u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
+				 op_ctx->rsvd_page_tables.ptr;
+
+	if (remaining_pt_count) {
+		kmem_cache_free_bulk(pt_cache, remaining_pt_count,
+				     op_ctx->rsvd_page_tables.pages +
+				     op_ctx->rsvd_page_tables.ptr);
+	}
+
+	kfree(op_ctx->rsvd_page_tables.pages);
+
+	if (op_ctx->map.vm_bo)
+		panthor_vm_bo_put(op_ctx->map.vm_bo);
+
+	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
+		kfree(op_ctx->preallocated_vmas[i]);
+
+	list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) {
+		list_del(&vma->node);
+		panthor_vm_bo_put(vma->base.vm_bo);
+		kfree(vma);
+	}
+}
+
+static struct panthor_vma *
+panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx)
+{
+	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
+		struct panthor_vma *vma = op_ctx->preallocated_vmas[i];
+
+		if (vma) {
+			op_ctx->preallocated_vmas[i] = NULL;
+			return vma;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx)
+{
+	u32 vma_count;
+
+	switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+		/* One VMA for the new mapping, and two more VMAs for the remap case
+		 * which might contain both a prev and next VA.
+		 */
+		vma_count = 3;
+		break;
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
+		/* Partial unmaps might trigger a remap with either a prev or a next VA,
+		 * but not both.
+		 */
+		vma_count = 1;
+		break;
+
+	default:
+		return 0;
+	}
+
+	for (u32 i = 0; i < vma_count; i++) {
+		struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+
+		if (!vma)
+			return -ENOMEM;
+
+		op_ctx->preallocated_vmas[i] = vma;
+	}
+
+	return 0;
+}
+
+#define PANTHOR_VM_BIND_OP_MAP_FLAGS \
+	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \
+	 DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
+
+static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+					 struct panthor_vm *vm,
+					 struct panthor_gem_object *bo,
+					 u64 offset,
+					 u64 size, u64 va,
+					 u32 flags)
+{
+	struct drm_gpuvm_bo *preallocated_vm_bo;
+	struct sg_table *sgt = NULL;
+	u64 pt_count;
+	int ret;
+
+	if (!bo)
+		return -EINVAL;
+
+	if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) ||
+	    (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)
+		return -EINVAL;
+
+	/* Make sure the VA and size are aligned and in-bounds. */
+	if (size > bo->base.base.size || offset > bo->base.base.size - size)
+		return -EINVAL;
+
+	/* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */
+	if (bo->exclusive_vm_root_gem &&
+	    bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm))
+		return -EINVAL;
+
+	memset(op_ctx, 0, sizeof(*op_ctx));
+	INIT_LIST_HEAD(&op_ctx->returned_vmas);
+	op_ctx->flags = flags;
+	op_ctx->va.range = size;
+	op_ctx->va.addr = va;
+
+	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
+	if (ret)
+		goto err_cleanup;
+
+	if (!bo->base.base.import_attach) {
+		/* Pre-reserve the BO pages, so the map operation doesn't have to
+		 * allocate.
+		 */
+		ret = drm_gem_shmem_pin(&bo->base);
+		if (ret)
+			goto err_cleanup;
+	}
+
+	sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+	if (IS_ERR(sgt)) {
+		if (!bo->base.base.import_attach)
+			drm_gem_shmem_unpin(&bo->base);
+
+		ret = PTR_ERR(sgt);
+		goto err_cleanup;
+	}
+
+	op_ctx->map.sgt = sgt;
+
+	preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base);
+	if (!preallocated_vm_bo) {
+		if (!bo->base.base.import_attach)
+			drm_gem_shmem_unpin(&bo->base);
+
+		ret = -ENOMEM;
+		goto err_cleanup;
+	}
+
+	mutex_lock(&bo->gpuva_list_lock);
+	op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
+	mutex_unlock(&bo->gpuva_list_lock);
+
+	/* If the a vm_bo for this <VM,BO> combination exists, it already
+	 * retains a pin ref, and we can release the one we took earlier.
+	 *
+	 * If our pre-allocated vm_bo is picked, it now retains the pin ref,
+	 * which will be released in panthor_vm_bo_put().
+	 */
+	if (preallocated_vm_bo != op_ctx->map.vm_bo &&
+	    !bo->base.base.import_attach)
+		drm_gem_shmem_unpin(&bo->base);
+
+	op_ctx->map.bo_offset = offset;
+
+	/* L1, L2 and L3 page tables.
+	 * We could optimize L3 allocation by iterating over the sgt and merging
+	 * 2M contiguous blocks, but it's simpler to over-provision and return
+	 * the pages if they're not used.
+	 */
+	pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) +
+		   ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) +
+		   ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21);
+
+	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
+						 sizeof(*op_ctx->rsvd_page_tables.pages),
+						 GFP_KERNEL);
+	if (!op_ctx->rsvd_page_tables.pages)
+		goto err_cleanup;
+
+	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+				    op_ctx->rsvd_page_tables.pages);
+	op_ctx->rsvd_page_tables.count = ret;
+	if (ret != pt_count) {
+		ret = -ENOMEM;
+		goto err_cleanup;
+	}
+
+	/* Insert BO into the extobj list last, when we know nothing can fail. */
+	dma_resv_lock(panthor_vm_resv(vm), NULL);
+	drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo);
+	dma_resv_unlock(panthor_vm_resv(vm));
+
+	return 0;
+
+err_cleanup:
+	panthor_vm_cleanup_op_ctx(op_ctx, vm);
+	return ret;
+}
+
+static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+					   struct panthor_vm *vm,
+					   u64 va, u64 size)
+{
+	u32 pt_count = 0;
+	int ret;
+
+	memset(op_ctx, 0, sizeof(*op_ctx));
+	INIT_LIST_HEAD(&op_ctx->returned_vmas);
+	op_ctx->va.range = size;
+	op_ctx->va.addr = va;
+	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
+
+	/* Pre-allocate L3 page tables to account for the split-2M-block
+	 * situation on unmap.
+	 */
+	if (va != ALIGN(va, SZ_2M))
+		pt_count++;
+
+	if (va + size != ALIGN(va + size, SZ_2M) &&
+	    ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M))
+		pt_count++;
+
+	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
+	if (ret)
+		goto err_cleanup;
+
+	if (pt_count) {
+		op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
+							 sizeof(*op_ctx->rsvd_page_tables.pages),
+							 GFP_KERNEL);
+		if (!op_ctx->rsvd_page_tables.pages)
+			goto err_cleanup;
+
+		ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+					    op_ctx->rsvd_page_tables.pages);
+		if (ret != pt_count) {
+			ret = -ENOMEM;
+			goto err_cleanup;
+		}
+		op_ctx->rsvd_page_tables.count = pt_count;
+	}
+
+	return 0;
+
+err_cleanup:
+	panthor_vm_cleanup_op_ctx(op_ctx, vm);
+	return ret;
+}
+
+static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+						struct panthor_vm *vm)
+{
+	memset(op_ctx, 0, sizeof(*op_ctx));
+	INIT_LIST_HEAD(&op_ctx->returned_vmas);
+	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
+}
+
+/**
+ * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address
+ * @vm: VM to look into.
+ * @va: Virtual address to search for.
+ * @bo_offset: Offset of the GEM object mapped at this virtual address.
+ * Only valid on success.
+ *
+ * The object returned by this function might no longer be mapped when the
+ * function returns. It's the caller responsibility to ensure there's no
+ * concurrent map/unmap operations making the returned value invalid, or
+ * make sure it doesn't matter if the object is no longer mapped.
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct panthor_gem_object *
+panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset)
+{
+	struct panthor_gem_object *bo = ERR_PTR(-ENOENT);
+	struct drm_gpuva *gpuva;
+	struct panthor_vma *vma;
+
+	/* Take the VM lock to prevent concurrent map/unmap operations. */
+	mutex_lock(&vm->op_lock);
+	gpuva = drm_gpuva_find_first(&vm->base, va, 1);
+	vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL;
+	if (vma && vma->base.gem.obj) {
+		drm_gem_object_get(vma->base.gem.obj);
+		bo = to_panthor_bo(vma->base.gem.obj);
+		*bo_offset = vma->base.gem.offset + (va - vma->base.va.addr);
+	}
+	mutex_unlock(&vm->op_lock);
+
+	return bo;
+}
+
+#define PANTHOR_VM_MIN_KERNEL_VA_SIZE	SZ_256M
+
+static u64
+panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args,
+				    u64 full_va_range)
+{
+	u64 user_va_range;
+
+	/* Make sure we have a minimum amount of VA space for kernel objects. */
+	if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE)
+		return 0;
+
+	if (args->user_va_range) {
+		/* Use the user provided value if != 0. */
+		user_va_range = args->user_va_range;
+	} else if (TASK_SIZE_OF(current) < full_va_range) {
+		/* If the task VM size is smaller than the GPU VA range, pick this
+		 * as our default user VA range, so userspace can CPU/GPU map buffers
+		 * at the same address.
+		 */
+		user_va_range = TASK_SIZE_OF(current);
+	} else {
+		/* If the GPU VA range is smaller than the task VM size, we
+		 * just have to live with the fact we won't be able to map
+		 * all buffers at the same GPU/CPU address.
+		 *
+		 * If the GPU VA range is bigger than 4G (more than 32-bit of
+		 * VA), we split the range in two, and assign half of it to
+		 * the user and the other half to the kernel, if it's not, we
+		 * keep the kernel VA space as small as possible.
+		 */
+		user_va_range = full_va_range > SZ_4G ?
+				full_va_range / 2 :
+				full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
+	}
+
+	if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range)
+		user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
+
+	return user_va_range;
+}
+
+#define PANTHOR_VM_CREATE_FLAGS		0
+
+static int
+panthor_vm_create_check_args(const struct panthor_device *ptdev,
+			     const struct drm_panthor_vm_create *args,
+			     u64 *kernel_va_start, u64 *kernel_va_range)
+{
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	u64 full_va_range = 1ull << va_bits;
+	u64 user_va_range;
+
+	if (args->flags & ~PANTHOR_VM_CREATE_FLAGS)
+		return -EINVAL;
+
+	user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range);
+	if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range))
+		return -EINVAL;
+
+	/* Pick a kernel VA range that's a power of two, to have a clear split. */
+	*kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range);
+	*kernel_va_start = full_va_range - *kernel_va_range;
+	return 0;
+}
+
+/*
+ * Only 32 VMs per open file. If that becomes a limiting factor, we can
+ * increase this number.
+ */
+#define PANTHOR_MAX_VMS_PER_FILE	32
+
+/**
+ * panthor_vm_pool_create_vm() - Create a VM
+ * @pool: The VM to create this VM on.
+ * @kernel_va_start: Start of the region reserved for kernel objects.
+ * @kernel_va_range: Size of the region reserved for kernel objects.
+ *
+ * Return: a positive VM ID on success, a negative error code otherwise.
+ */
+int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
+			      struct panthor_vm_pool *pool,
+			      struct drm_panthor_vm_create *args)
+{
+	u64 kernel_va_start, kernel_va_range;
+	struct panthor_vm *vm;
+	int ret;
+	u32 id;
+
+	ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range);
+	if (ret)
+		return ret;
+
+	vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range,
+			       kernel_va_start, kernel_va_range);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	ret = xa_alloc(&pool->xa, &id, vm,
+		       XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL);
+
+	if (ret) {
+		panthor_vm_put(vm);
+		return ret;
+	}
+
+	args->user_va_range = kernel_va_start;
+	return id;
+}
+
+static void panthor_vm_destroy(struct panthor_vm *vm)
+{
+	if (!vm)
+		return;
+
+	vm->destroyed = true;
+
+	mutex_lock(&vm->heaps.lock);
+	panthor_heap_pool_destroy(vm->heaps.pool);
+	vm->heaps.pool = NULL;
+	mutex_unlock(&vm->heaps.lock);
+
+	drm_WARN_ON(&vm->ptdev->base,
+		    panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range));
+	panthor_vm_put(vm);
+}
+
+/**
+ * panthor_vm_pool_destroy_vm() - Destroy a VM.
+ * @pool: VM pool.
+ * @handle: VM handle.
+ *
+ * This function doesn't free the VM object or its resources, it just kills
+ * all mappings, and makes sure nothing can be mapped after that point.
+ *
+ * If there was any active jobs at the time this function is called, these
+ * jobs should experience page faults and be killed as a result.
+ *
+ * The VM resources are freed when the last reference on the VM object is
+ * dropped.
+ */
+int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
+{
+	struct panthor_vm *vm;
+
+	vm = xa_erase(&pool->xa, handle);
+
+	panthor_vm_destroy(vm);
+
+	return vm ? 0 : -EINVAL;
+}
+
+/**
+ * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle
+ * @pool: VM pool to check.
+ * @handle: Handle of the VM to retrieve.
+ *
+ * Return: A valid pointer if the VM exists, NULL otherwise.
+ */
+struct panthor_vm *
+panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
+{
+	struct panthor_vm *vm;
+
+	vm = panthor_vm_get(xa_load(&pool->xa, handle));
+
+	return vm;
+}
+
+/**
+ * panthor_vm_pool_destroy() - Destroy a VM pool.
+ * @pfile: File.
+ *
+ * Destroy all VMs in the pool, and release the pool resources.
+ *
+ * Note that VMs can outlive the pool they were created from if other
+ * objects hold a reference to there VMs.
+ */
+void panthor_vm_pool_destroy(struct panthor_file *pfile)
+{
+	struct panthor_vm *vm;
+	unsigned long i;
+
+	if (!pfile->vms)
+		return;
+
+	xa_for_each(&pfile->vms->xa, i, vm)
+		panthor_vm_destroy(vm);
+
+	xa_destroy(&pfile->vms->xa);
+	kfree(pfile->vms);
+}
+
+/**
+ * panthor_vm_pool_create() - Create a VM pool
+ * @pfile: File.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_pool_create(struct panthor_file *pfile)
+{
+	pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL);
+	if (!pfile->vms)
+		return -ENOMEM;
+
+	xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1);
+	return 0;
+}
+
+/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */
+static void mmu_tlb_flush_all(void *cookie)
+{
+}
+
+static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie)
+{
+}
+
+static const struct iommu_flush_ops mmu_tlb_ops = {
+	.tlb_flush_all = mmu_tlb_flush_all,
+	.tlb_flush_walk = mmu_tlb_flush_walk,
+};
+
+static const char *access_type_name(struct panthor_device *ptdev,
+				    u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		return "ATOMIC";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		drm_WARN_ON(&ptdev->base, 1);
+		return NULL;
+	}
+}
+
+static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+	bool has_unhandled_faults = false;
+
+	status = panthor_mmu_fault_mask(ptdev, status);
+	while (status) {
+		u32 as = ffs(status | (status >> 16)) - 1;
+		u32 mask = panthor_mmu_as_fault_mask(ptdev, as);
+		u32 new_int_mask;
+		u64 addr;
+		u32 fault_status;
+		u32 exception_type;
+		u32 access_type;
+		u32 source_id;
+
+		fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as));
+		addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as));
+		addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32;
+
+		/* decode the fault status */
+		exception_type = fault_status & 0xFF;
+		access_type = (fault_status >> 8) & 0x3;
+		source_id = (fault_status >> 16);
+
+		mutex_lock(&ptdev->mmu->as.slots_lock);
+
+		ptdev->mmu->as.faulty_mask |= mask;
+		new_int_mask =
+			panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask);
+
+		/* terminal fault, print info about the fault */
+		drm_err(&ptdev->base,
+			"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+			"raw fault status: 0x%X\n"
+			"decoded fault status: %s\n"
+			"exception type 0x%X: %s\n"
+			"access type 0x%X: %s\n"
+			"source id 0x%X\n",
+			as, addr,
+			fault_status,
+			(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+			exception_type, panthor_exception_name(ptdev, exception_type),
+			access_type, access_type_name(ptdev, fault_status),
+			source_id);
+
+		/* Ignore MMU interrupts on this AS until it's been
+		 * re-enabled.
+		 */
+		ptdev->mmu->irq.mask = new_int_mask;
+		gpu_write(ptdev, MMU_INT_MASK, new_int_mask);
+
+		if (ptdev->mmu->as.slots[as].vm)
+			ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
+
+		/* Disable the MMU to kill jobs on this AS. */
+		panthor_mmu_as_disable(ptdev, as);
+		mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+		status &= ~mask;
+		has_unhandled_faults = true;
+	}
+
+	if (has_unhandled_faults)
+		panthor_sched_report_mmu_fault(ptdev);
+}
+PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
+
+/**
+ * panthor_mmu_suspend() - Suspend the MMU logic
+ * @ptdev: Device.
+ *
+ * All we do here is de-assign the AS slots on all active VMs, so things
+ * get flushed to the main memory, and no further access to these VMs are
+ * possible.
+ *
+ * We also suspend the MMU IRQ.
+ */
+void panthor_mmu_suspend(struct panthor_device *ptdev)
+{
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
+		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
+
+		if (vm) {
+			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
+			panthor_vm_release_as_locked(vm);
+		}
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+}
+
+/**
+ * panthor_mmu_resume() - Resume the MMU logic
+ * @ptdev: Device.
+ *
+ * Resume the IRQ.
+ *
+ * We don't re-enable previously active VMs. We assume other parts of the
+ * driver will call panthor_vm_active() on the VMs they intend to use.
+ */
+void panthor_mmu_resume(struct panthor_device *ptdev)
+{
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	ptdev->mmu->as.alloc_mask = 0;
+	ptdev->mmu->as.faulty_mask = 0;
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+}
+
+/**
+ * panthor_mmu_pre_reset() - Prepare for a reset
+ * @ptdev: Device.
+ *
+ * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we
+ * don't get asked to do a VM operation while the GPU is down.
+ *
+ * We don't cleanly shutdown the AS slots here, because the reset might
+ * come from an AS_ACTIVE_BIT stuck situation.
+ */
+void panthor_mmu_pre_reset(struct panthor_device *ptdev)
+{
+	struct panthor_vm *vm;
+
+	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	ptdev->mmu->vm.reset_in_progress = true;
+	list_for_each_entry(vm, &ptdev->mmu->vm.list, node)
+		panthor_vm_stop(vm);
+	mutex_unlock(&ptdev->mmu->vm.lock);
+}
+
+/**
+ * panthor_mmu_post_reset() - Restore things after a reset
+ * @ptdev: Device.
+ *
+ * Put the MMU logic back in action after a reset. That implies resuming the
+ * IRQ and re-enabling the VM_BIND queues.
+ */
+void panthor_mmu_post_reset(struct panthor_device *ptdev)
+{
+	struct panthor_vm *vm;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+
+	/* Now that the reset is effective, we can assume that none of the
+	 * AS slots are setup, and clear the faulty flags too.
+	 */
+	ptdev->mmu->as.alloc_mask = 0;
+	ptdev->mmu->as.faulty_mask = 0;
+
+	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
+		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
+
+		if (vm)
+			panthor_vm_release_as_locked(vm);
+	}
+
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+
+	/* Restart the VM_BIND queues. */
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
+		panthor_vm_start(vm);
+	}
+	ptdev->mmu->vm.reset_in_progress = false;
+	mutex_unlock(&ptdev->mmu->vm.lock);
+}
+
+static void panthor_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base);
+	struct panthor_device *ptdev = vm->ptdev;
+
+	mutex_lock(&vm->heaps.lock);
+	if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
+		panthor_heap_pool_destroy(vm->heaps.pool);
+	mutex_unlock(&vm->heaps.lock);
+	mutex_destroy(&vm->heaps.lock);
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_del(&vm->node);
+	/* Restore the scheduler state so we can call drm_sched_entity_destroy()
+	 * and drm_sched_fini(). If get there, that means we have no job left
+	 * and no new jobs can be queued, so we can start the scheduler without
+	 * risking interfering with the reset.
+	 */
+	if (ptdev->mmu->vm.reset_in_progress)
+		panthor_vm_start(vm);
+	mutex_unlock(&ptdev->mmu->vm.lock);
+
+	drm_sched_entity_destroy(&vm->entity);
+	drm_sched_fini(&vm->sched);
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	if (vm->as.id >= 0) {
+		int cookie;
+
+		if (drm_dev_enter(&ptdev->base, &cookie)) {
+			panthor_mmu_as_disable(ptdev, vm->as.id);
+			drm_dev_exit(cookie);
+		}
+
+		ptdev->mmu->as.slots[vm->as.id].vm = NULL;
+		clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
+		list_del(&vm->as.lru_node);
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	free_io_pgtable_ops(vm->pgtbl_ops);
+
+	drm_mm_takedown(&vm->mm);
+	kfree(vm);
+}
+
+/**
+ * panthor_vm_put() - Release a reference on a VM
+ * @vm: VM to release the reference on. Can be NULL.
+ */
+void panthor_vm_put(struct panthor_vm *vm)
+{
+	drm_gpuvm_put(vm ? &vm->base : NULL);
+}
+
+/**
+ * panthor_vm_get() - Get a VM reference
+ * @vm: VM to get the reference on. Can be NULL.
+ *
+ * Return: @vm value.
+ */
+struct panthor_vm *panthor_vm_get(struct panthor_vm *vm)
+{
+	if (vm)
+		drm_gpuvm_get(&vm->base);
+
+	return vm;
+}
+
+/**
+ * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM
+ * @vm: VM to query the heap pool on.
+ * @create: True if the heap pool should be created when it doesn't exist.
+ *
+ * Heap pools are per-VM. This function allows one to retrieve the heap pool
+ * attached to a VM.
+ *
+ * If no heap pool exists yet, and @create is true, we create one.
+ *
+ * The returned panthor_heap_pool should be released with panthor_heap_pool_put().
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create)
+{
+	struct panthor_heap_pool *pool;
+
+	mutex_lock(&vm->heaps.lock);
+	if (!vm->heaps.pool && create) {
+		if (vm->destroyed)
+			pool = ERR_PTR(-EINVAL);
+		else
+			pool = panthor_heap_pool_create(vm->ptdev, vm);
+
+		if (!IS_ERR(pool))
+			vm->heaps.pool = panthor_heap_pool_get(pool);
+	} else {
+		pool = panthor_heap_pool_get(vm->heaps.pool);
+	}
+	mutex_unlock(&vm->heaps.lock);
+
+	return pool;
+}
+
+static u64 mair_to_memattr(u64 mair)
+{
+	u64 memattr = 0;
+	u32 i;
+
+	for (i = 0; i < 8; i++) {
+		u8 in_attr = mair >> (8 * i), out_attr;
+		u8 outer = in_attr >> 4, inner = in_attr & 0xf;
+
+		/* For caching to be enabled, inner and outer caching policy
+		 * have to be both write-back, if one of them is write-through
+		 * or non-cacheable, we just choose non-cacheable. Device
+		 * memory is also translated to non-cacheable.
+		 */
+		if (!(outer & 3) || !(outer & 4) || !(inner & 4)) {
+			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC |
+				   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
+				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
+		} else {
+			/* Use SH_CPU_INNER mode so SH_IS, which is used when
+			 * IOMMU_CACHE is set, actually maps to the standard
+			 * definition of inner-shareable and not Mali's
+			 * internal-shareable mode.
+			 */
+			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
+				   AS_MEMATTR_AARCH64_SH_CPU_INNER |
+				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
+		}
+
+		memattr |= (u64)out_attr << (8 * i);
+	}
+
+	return memattr;
+}
+
+static void panthor_vma_link(struct panthor_vm *vm,
+			     struct panthor_vma *vma,
+			     struct drm_gpuvm_bo *vm_bo)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
+
+	mutex_lock(&bo->gpuva_list_lock);
+	drm_gpuva_link(&vma->base, vm_bo);
+	drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo));
+	mutex_unlock(&bo->gpuva_list_lock);
+}
+
+static void panthor_vma_unlink(struct panthor_vm *vm,
+			       struct panthor_vma *vma)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
+	struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo);
+
+	mutex_lock(&bo->gpuva_list_lock);
+	drm_gpuva_unlink(&vma->base);
+	mutex_unlock(&bo->gpuva_list_lock);
+
+	/* drm_gpuva_unlink() release the vm_bo, but we manually retained it
+	 * when entering this function, so we can implement deferred VMA
+	 * destruction. Re-assign it here.
+	 */
+	vma->base.vm_bo = vm_bo;
+	list_add_tail(&vma->node, &vm->op_ctx->returned_vmas);
+}
+
+static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
+{
+	INIT_LIST_HEAD(&vma->node);
+	vma->flags = flags;
+}
+
+#define PANTHOR_VM_MAP_FLAGS \
+	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)
+
+static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv)
+{
+	struct panthor_vm *vm = priv;
+	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
+	struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx);
+	int ret;
+
+	if (!vma)
+		return -EINVAL;
+
+	panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS);
+
+	ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags),
+				   op_ctx->map.sgt, op->map.gem.offset,
+				   op->map.va.range);
+	if (ret)
+		return ret;
+
+	/* Ref owned by the mapping now, clear the obj field so we don't release the
+	 * pinning/obj ref behind GPUVA's back.
+	 */
+	drm_gpuva_map(&vm->base, &vma->base, &op->map);
+	panthor_vma_link(vm, vma, op_ctx->map.vm_bo);
+	op_ctx->map.vm_bo = NULL;
+	return 0;
+}
+
+static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
+				       void *priv)
+{
+	struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base);
+	struct panthor_vm *vm = priv;
+	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
+	struct panthor_vma *prev_vma = NULL, *next_vma = NULL;
+	u64 unmap_start, unmap_range;
+	int ret;
+
+	drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range);
+	ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range);
+	if (ret)
+		return ret;
+
+	if (op->remap.prev) {
+		prev_vma = panthor_vm_op_ctx_get_vma(op_ctx);
+		panthor_vma_init(prev_vma, unmap_vma->flags);
+	}
+
+	if (op->remap.next) {
+		next_vma = panthor_vm_op_ctx_get_vma(op_ctx);
+		panthor_vma_init(next_vma, unmap_vma->flags);
+	}
+
+	drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL,
+			next_vma ? &next_vma->base : NULL,
+			&op->remap);
+
+	if (prev_vma) {
+		/* panthor_vma_link() transfers the vm_bo ownership to
+		 * the VMA object. Since the vm_bo we're passing is still
+		 * owned by the old mapping which will be released when this
+		 * mapping is destroyed, we need to grab a ref here.
+		 */
+		panthor_vma_link(vm, prev_vma,
+				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+	}
+
+	if (next_vma) {
+		panthor_vma_link(vm, next_vma,
+				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+	}
+
+	panthor_vma_unlink(vm, unmap_vma);
+	return 0;
+}
+
+static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
+				       void *priv)
+{
+	struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base);
+	struct panthor_vm *vm = priv;
+	int ret;
+
+	ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr,
+				     unmap_vma->base.va.range);
+	if (drm_WARN_ON(&vm->ptdev->base, ret))
+		return ret;
+
+	drm_gpuva_unmap(&op->unmap);
+	panthor_vma_unlink(vm, unmap_vma);
+	return 0;
+}
+
+static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
+	.vm_free = panthor_vm_free,
+	.sm_step_map = panthor_gpuva_sm_step_map,
+	.sm_step_remap = panthor_gpuva_sm_step_remap,
+	.sm_step_unmap = panthor_gpuva_sm_step_unmap,
+};
+
+/**
+ * panthor_vm_resv() - Get the dma_resv object attached to a VM.
+ * @vm: VM to get the dma_resv of.
+ *
+ * Return: A dma_resv object.
+ */
+struct dma_resv *panthor_vm_resv(struct panthor_vm *vm)
+{
+	return drm_gpuvm_resv(&vm->base);
+}
+
+struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm)
+{
+	if (!vm)
+		return NULL;
+
+	return vm->base.r_obj;
+}
+
+static int
+panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
+		   bool flag_vm_unusable_on_failure)
+{
+	u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK;
+	int ret;
+
+	if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY)
+		return 0;
+
+	mutex_lock(&vm->op_lock);
+	vm->op_ctx = op;
+	switch (op_type) {
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+		if (vm->unusable) {
+			ret = -EINVAL;
+			break;
+		}
+
+		ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range,
+				       op->map.vm_bo->obj, op->map.bo_offset);
+		break;
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
+		ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret && flag_vm_unusable_on_failure)
+		vm->unusable = true;
+
+	vm->op_ctx = NULL;
+	mutex_unlock(&vm->op_lock);
+
+	return ret;
+}
+
+static struct dma_fence *
+panthor_vm_bind_run_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
+	bool cookie;
+	int ret;
+
+	/* Not only we report an error whose result is propagated to the
+	 * drm_sched finished fence, but we also flag the VM as unusable, because
+	 * a failure in the async VM_BIND results in an inconsistent state. VM needs
+	 * to be destroyed and recreated.
+	 */
+	cookie = dma_fence_begin_signalling();
+	ret = panthor_vm_exec_op(job->vm, &job->ctx, true);
+	dma_fence_end_signalling(cookie);
+
+	return ret ? ERR_PTR(ret) : NULL;
+}
+
+static void panthor_vm_bind_job_release(struct kref *kref)
+{
+	struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount);
+
+	if (job->base.s_fence)
+		drm_sched_job_cleanup(&job->base);
+
+	panthor_vm_cleanup_op_ctx(&job->ctx, job->vm);
+	panthor_vm_put(job->vm);
+	kfree(job);
+}
+
+/**
+ * panthor_vm_bind_job_put() - Release a VM_BIND job reference
+ * @sched_job: Job to release the reference on.
+ */
+void panthor_vm_bind_job_put(struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job =
+		container_of(sched_job, struct panthor_vm_bind_job, base);
+
+	if (sched_job)
+		kref_put(&job->refcount, panthor_vm_bind_job_release);
+}
+
+static void
+panthor_vm_bind_free_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job =
+		container_of(sched_job, struct panthor_vm_bind_job, base);
+
+	drm_sched_job_cleanup(sched_job);
+
+	/* Do the heavy cleanups asynchronously, so we're out of the
+	 * dma-signaling path and can acquire dma-resv locks safely.
+	 */
+	queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work);
+}
+
+static enum drm_gpu_sched_stat
+panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
+{
+	WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!");
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
+	.run_job = panthor_vm_bind_run_job,
+	.free_job = panthor_vm_bind_free_job,
+	.timedout_job = panthor_vm_bind_timedout_job,
+};
+
+/**
+ * panthor_vm_create() - Create a VM
+ * @ptdev: Device.
+ * @for_mcu: True if this is the FW MCU VM.
+ * @kernel_va_start: Start of the range reserved for kernel BO mapping.
+ * @kernel_va_size: Size of the range reserved for kernel BO mapping.
+ * @auto_kernel_va_start: Start of the auto-VA kernel range.
+ * @auto_kernel_va_size: Size of the auto-VA kernel range.
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct panthor_vm *
+panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
+		  u64 kernel_va_start, u64 kernel_va_size,
+		  u64 auto_kernel_va_start, u64 auto_kernel_va_size)
+{
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
+	u64 full_va_range = 1ull << va_bits;
+	struct drm_gem_object *dummy_gem;
+	struct drm_gpu_scheduler *sched;
+	struct io_pgtable_cfg pgtbl_cfg;
+	u64 mair, min_va, va_range;
+	struct panthor_vm *vm;
+	int ret;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	/* We allocate a dummy GEM for the VM. */
+	dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base);
+	if (!dummy_gem) {
+		ret = -ENOMEM;
+		goto err_free_vm;
+	}
+
+	mutex_init(&vm->heaps.lock);
+	vm->for_mcu = for_mcu;
+	vm->ptdev = ptdev;
+	mutex_init(&vm->op_lock);
+
+	if (for_mcu) {
+		/* CSF MCU is a cortex M7, and can only address 4G */
+		min_va = 0;
+		va_range = SZ_4G;
+	} else {
+		min_va = 0;
+		va_range = full_va_range;
+	}
+
+	mutex_init(&vm->mm_lock);
+	drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size);
+	vm->kernel_auto_va.start = auto_kernel_va_start;
+	vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1;
+
+	INIT_LIST_HEAD(&vm->node);
+	INIT_LIST_HEAD(&vm->as.lru_node);
+	vm->as.id = -1;
+	refcount_set(&vm->as.active_cnt, 0);
+
+	pgtbl_cfg = (struct io_pgtable_cfg) {
+		.pgsize_bitmap	= SZ_4K | SZ_2M,
+		.ias		= va_bits,
+		.oas		= pa_bits,
+		.coherent_walk	= ptdev->coherent,
+		.tlb		= &mmu_tlb_ops,
+		.iommu_dev	= ptdev->base.dev,
+		.alloc		= alloc_pt,
+		.free		= free_pt,
+	};
+
+	vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm);
+	if (!vm->pgtbl_ops) {
+		ret = -EINVAL;
+		goto err_mm_takedown;
+	}
+
+	/* Bind operations are synchronous for now, no timeout needed. */
+	ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq,
+			     1, 1, 0,
+			     MAX_SCHEDULE_TIMEOUT, NULL, NULL,
+			     "panthor-vm-bind", ptdev->base.dev);
+	if (ret)
+		goto err_free_io_pgtable;
+
+	sched = &vm->sched;
+	ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL);
+	if (ret)
+		goto err_sched_fini;
+
+	mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
+	vm->memattr = mair_to_memattr(mair);
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_add_tail(&vm->node, &ptdev->mmu->vm.list);
+
+	/* If a reset is in progress, stop the scheduler. */
+	if (ptdev->mmu->vm.reset_in_progress)
+		panthor_vm_stop(vm);
+	mutex_unlock(&ptdev->mmu->vm.lock);
+
+	/* We intentionally leave the reserved range to zero, because we want kernel VMAs
+	 * to be handled the same way user VMAs are.
+	 */
+	drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM",
+		       DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem,
+		       min_va, va_range, 0, 0, &panthor_gpuvm_ops);
+	drm_gem_object_put(dummy_gem);
+	return vm;
+
+err_sched_fini:
+	drm_sched_fini(&vm->sched);
+
+err_free_io_pgtable:
+	free_io_pgtable_ops(vm->pgtbl_ops);
+
+err_mm_takedown:
+	drm_mm_takedown(&vm->mm);
+	drm_gem_object_put(dummy_gem);
+
+err_free_vm:
+	kfree(vm);
+	return ERR_PTR(ret);
+}
+
+static int
+panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
+			       struct panthor_vm *vm,
+			       const struct drm_panthor_vm_bind_op *op,
+			       struct panthor_vm_op_ctx *op_ctx)
+{
+	struct drm_gem_object *gem;
+	int ret;
+
+	/* Aligned on page size. */
+	if ((op->va | op->size) & ~PAGE_MASK)
+		return -EINVAL;
+
+	switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+		gem = drm_gem_object_lookup(file, op->bo_handle);
+		ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm,
+						    gem ? to_panthor_bo(gem) : NULL,
+						    op->bo_offset,
+						    op->size,
+						    op->va,
+						    op->flags);
+		drm_gem_object_put(gem);
+		return ret;
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
+		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
+			return -EINVAL;
+
+		if (op->bo_handle || op->bo_offset)
+			return -EINVAL;
+
+		return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size);
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY:
+		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
+			return -EINVAL;
+
+		if (op->bo_handle || op->bo_offset)
+			return -EINVAL;
+
+		if (op->va || op->size)
+			return -EINVAL;
+
+		if (!op->syncs.count)
+			return -EINVAL;
+
+		panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm);
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work)
+{
+	struct panthor_vm_bind_job *job =
+		container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work);
+
+	panthor_vm_bind_job_put(&job->base);
+}
+
+/**
+ * panthor_vm_bind_job_create() - Create a VM_BIND job
+ * @file: File.
+ * @vm: VM targeted by the VM_BIND job.
+ * @op: VM operation data.
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct drm_sched_job *
+panthor_vm_bind_job_create(struct drm_file *file,
+			   struct panthor_vm *vm,
+			   const struct drm_panthor_vm_bind_op *op)
+{
+	struct panthor_vm_bind_job *job;
+	int ret;
+
+	if (!vm)
+		return ERR_PTR(-EINVAL);
+
+	if (vm->destroyed || vm->unusable)
+		return ERR_PTR(-EINVAL);
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx);
+	if (ret) {
+		kfree(job);
+		return ERR_PTR(ret);
+	}
+
+	INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work);
+	kref_init(&job->refcount);
+	job->vm = panthor_vm_get(vm);
+
+	ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm);
+	if (ret)
+		goto err_put_job;
+
+	return &job->base;
+
+err_put_job:
+	panthor_vm_bind_job_put(&job->base);
+	return ERR_PTR(ret);
+}
+
+/**
+ * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs
+ * @exec: The locking/preparation context.
+ * @sched_job: The job to prepare resvs on.
+ *
+ * Locks and prepare the VM resv.
+ *
+ * If this is a map operation, locks and prepares the GEM resv.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
+				      struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
+	int ret;
+
+	/* Acquire the VM lock an reserve a slot for this VM bind job. */
+	ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1);
+	if (ret)
+		return ret;
+
+	if (job->ctx.map.vm_bo) {
+		/* Lock/prepare the GEM being mapped. */
+		ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job
+ * @exec: drm_exec context.
+ * @sched_job: Job to update the resvs on.
+ */
+void panthor_vm_bind_job_update_resvs(struct drm_exec *exec,
+				      struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
+
+	/* Explicit sync => we just register our job finished fence as bookkeep. */
+	drm_gpuvm_resv_add_fence(&job->vm->base, exec,
+				 &sched_job->s_fence->finished,
+				 DMA_RESV_USAGE_BOOKKEEP,
+				 DMA_RESV_USAGE_BOOKKEEP);
+}
+
+void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
+			     struct dma_fence *fence,
+			     enum dma_resv_usage private_usage,
+			     enum dma_resv_usage extobj_usage)
+{
+	drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage);
+}
+
+/**
+ * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously.
+ * @file: File.
+ * @vm: VM targeted by the VM operation.
+ * @op: Data describing the VM operation.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_bind_exec_sync_op(struct drm_file *file,
+				 struct panthor_vm *vm,
+				 struct drm_panthor_vm_bind_op *op)
+{
+	struct panthor_vm_op_ctx op_ctx;
+	int ret;
+
+	/* No sync objects allowed on synchronous operations. */
+	if (op->syncs.count)
+		return -EINVAL;
+
+	if (!op->size)
+		return 0;
+
+	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx);
+	if (ret)
+		return ret;
+
+	ret = panthor_vm_exec_op(vm, &op_ctx, false);
+	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_map_bo_range() - Map a GEM object range to a VM
+ * @vm: VM to map the GEM to.
+ * @bo: GEM object to map.
+ * @offset: Offset in the GEM object.
+ * @size: Size to map.
+ * @va: Virtual address to map the object to.
+ * @flags: Combination of drm_panthor_vm_bind_op_flags flags.
+ * Only map-related flags are valid.
+ *
+ * Internal use only. For userspace requests, use
+ * panthor_vm_bind_exec_sync_op() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
+			    u64 offset, u64 size, u64 va, u32 flags)
+{
+	struct panthor_vm_op_ctx op_ctx;
+	int ret;
+
+	ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags);
+	if (ret)
+		return ret;
+
+	ret = panthor_vm_exec_op(vm, &op_ctx, false);
+	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_unmap_range() - Unmap a portion of the VA space
+ * @vm: VM to unmap the region from.
+ * @va: Virtual address to unmap. Must be 4k aligned.
+ * @size: Size of the region to unmap. Must be 4k aligned.
+ *
+ * Internal use only. For userspace requests, use
+ * panthor_vm_bind_exec_sync_op() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size)
+{
+	struct panthor_vm_op_ctx op_ctx;
+	int ret;
+
+	ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size);
+	if (ret)
+		return ret;
+
+	ret = panthor_vm_exec_op(vm, &op_ctx, false);
+	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs.
+ * @exec: Locking/preparation context.
+ * @vm: VM targeted by the GPU job.
+ * @slot_count: Number of slots to reserve.
+ *
+ * GPU jobs assume all BOs bound to the VM at the time the job is submitted
+ * are available when the job is executed. In order to guarantee that, we
+ * need to reserve a slot on all BOs mapped to a VM and update this slot with
+ * the job fence after its submission.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm,
+					u32 slot_count)
+{
+	int ret;
+
+	/* Acquire the VM lock and reserve a slot for this GPU job. */
+	ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count);
+	if (ret)
+		return ret;
+
+	return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
+}
+
+/**
+ * panthor_mmu_unplug() - Unplug the MMU logic
+ * @ptdev: Device.
+ *
+ * No access to the MMU regs should be done after this function is called.
+ * We suspend the IRQ and disable all VMs to guarantee that.
+ */
+void panthor_mmu_unplug(struct panthor_device *ptdev)
+{
+	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
+		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
+
+		if (vm) {
+			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
+			panthor_vm_release_as_locked(vm);
+		}
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+}
+
+static void panthor_mmu_release_wq(struct drm_device *ddev, void *res)
+{
+	destroy_workqueue(res);
+}
+
+/**
+ * panthor_mmu_init() - Initialize the MMU logic.
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_mmu_init(struct panthor_device *ptdev)
+{
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	struct panthor_mmu *mmu;
+	int ret, irq;
+
+	mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL);
+	if (!mmu)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&mmu->as.lru_list);
+
+	ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&mmu->vm.list);
+	ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock);
+	if (ret)
+		return ret;
+
+	ptdev->mmu = mmu;
+
+	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu");
+	if (irq <= 0)
+		return -ENODEV;
+
+	ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq,
+				      panthor_mmu_fault_mask(ptdev, ~0));
+	if (ret)
+		return ret;
+
+	mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0);
+	if (!mmu->vm.wq)
+		return -ENOMEM;
+
+	/* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction,
+	 * which passes iova as an unsigned long. Patch the mmu_features to reflect this
+	 * limitation.
+	 */
+	if (sizeof(unsigned long) * 8 < va_bits) {
+		ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0);
+		ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8;
+	}
+
+	return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m)
+{
+	int ret;
+
+	mutex_lock(&vm->op_lock);
+	ret = drm_debugfs_gpuva_info(m, &vm->base);
+	mutex_unlock(&vm->op_lock);
+
+	return ret;
+}
+
+static int show_each_vm(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *ddev = node->minor->dev;
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data;
+	struct panthor_vm *vm;
+	int ret = 0;
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
+		ret = show(vm, m);
+		if (ret < 0)
+			break;
+
+		seq_puts(m, "\n");
+	}
+	mutex_unlock(&ptdev->mmu->vm.lock);
+
+	return ret;
+}
+
+static struct drm_info_list panthor_mmu_debugfs_list[] = {
+	DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas),
+};
+
+/**
+ * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries
+ * @minor: Minor.
+ */
+void panthor_mmu_debugfs_init(struct drm_minor *minor)
+{
+	drm_debugfs_create_files(panthor_mmu_debugfs_list,
+				 ARRAY_SIZE(panthor_mmu_debugfs_list),
+				 minor->debugfs_root, minor);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * panthor_mmu_pt_cache_init() - Initialize the page table cache.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_mmu_pt_cache_init(void)
+{
+	pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL);
+	if (!pt_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * panthor_mmu_pt_cache_fini() - Destroy the page table cache.
+ */
+void panthor_mmu_pt_cache_fini(void)
+{
+	kmem_cache_destroy(pt_cache);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h
new file mode 100644
index 000000000000..f3c1ed19f973
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_MMU_H__
+#define __PANTHOR_MMU_H__
+
+#include <linux/dma-resv.h>
+
+struct drm_exec;
+struct drm_sched_job;
+struct panthor_gem_object;
+struct panthor_heap_pool;
+struct panthor_vm;
+struct panthor_vma;
+struct panthor_mmu;
+
+int panthor_mmu_init(struct panthor_device *ptdev);
+void panthor_mmu_unplug(struct panthor_device *ptdev);
+void panthor_mmu_pre_reset(struct panthor_device *ptdev);
+void panthor_mmu_post_reset(struct panthor_device *ptdev);
+void panthor_mmu_suspend(struct panthor_device *ptdev);
+void panthor_mmu_resume(struct panthor_device *ptdev);
+
+int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
+			    u64 offset, u64 size, u64 va, u32 flags);
+int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size);
+struct panthor_gem_object *
+panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset);
+
+int panthor_vm_active(struct panthor_vm *vm);
+void panthor_vm_idle(struct panthor_vm *vm);
+int panthor_vm_as(struct panthor_vm *vm);
+
+struct panthor_heap_pool *
+panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create);
+
+struct panthor_vm *panthor_vm_get(struct panthor_vm *vm);
+void panthor_vm_put(struct panthor_vm *vm);
+struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
+				     u64 kernel_va_start, u64 kernel_va_size,
+				     u64 kernel_auto_va_start,
+				     u64 kernel_auto_va_size);
+
+int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec,
+					struct panthor_vm *vm,
+					u32 slot_count);
+int panthor_vm_add_bos_resvs_deps_to_job(struct panthor_vm *vm,
+					 struct drm_sched_job *job);
+void panthor_vm_add_job_fence_to_bos_resvs(struct panthor_vm *vm,
+					   struct drm_sched_job *job);
+
+struct dma_resv *panthor_vm_resv(struct panthor_vm *vm);
+struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm);
+
+void panthor_vm_pool_destroy(struct panthor_file *pfile);
+int panthor_vm_pool_create(struct panthor_file *pfile);
+int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
+			      struct panthor_vm_pool *pool,
+			      struct drm_panthor_vm_create *args);
+int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle);
+struct panthor_vm *panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle);
+
+bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm);
+bool panthor_vm_is_unusable(struct panthor_vm *vm);
+
+/*
+ * PANTHOR_VM_KERNEL_AUTO_VA: Use this magic address when you want the GEM
+ * logic to auto-allocate the virtual address in the reserved kernel VA range.
+ */
+#define PANTHOR_VM_KERNEL_AUTO_VA		~0ull
+
+int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
+			struct drm_mm_node *va_node);
+void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node);
+
+int panthor_vm_bind_exec_sync_op(struct drm_file *file,
+				 struct panthor_vm *vm,
+				 struct drm_panthor_vm_bind_op *op);
+
+struct drm_sched_job *
+panthor_vm_bind_job_create(struct drm_file *file,
+			   struct panthor_vm *vm,
+			   const struct drm_panthor_vm_bind_op *op);
+void panthor_vm_bind_job_put(struct drm_sched_job *job);
+int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
+				      struct drm_sched_job *job);
+void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
+
+void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
+			     struct dma_fence *fence,
+			     enum dma_resv_usage private_usage,
+			     enum dma_resv_usage extobj_usage);
+
+int panthor_mmu_pt_cache_init(void);
+void panthor_mmu_pt_cache_fini(void);
+
+#ifdef CONFIG_DEBUG_FS
+void panthor_mmu_debugfs_init(struct drm_minor *minor);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h
new file mode 100644
index 000000000000..b7b3b3add166
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+/*
+ * Register definitions based on mali_kbase_gpu_regmap.h and
+ * mali_kbase_gpu_regmap_csf.h
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ */
+#ifndef __PANTHOR_REGS_H__
+#define __PANTHOR_REGS_H__
+
+#define GPU_ID						0x0
+#define   GPU_ARCH_MAJOR(x)				((x) >> 28)
+#define   GPU_ARCH_MINOR(x)				(((x) & GENMASK(27, 24)) >> 24)
+#define   GPU_ARCH_REV(x)				(((x) & GENMASK(23, 20)) >> 20)
+#define   GPU_PROD_MAJOR(x)				(((x) & GENMASK(19, 16)) >> 16)
+#define   GPU_VER_MAJOR(x)				(((x) & GENMASK(15, 12)) >> 12)
+#define   GPU_VER_MINOR(x)				(((x) & GENMASK(11, 4)) >> 4)
+#define   GPU_VER_STATUS(x)				((x) & GENMASK(3, 0))
+
+#define GPU_L2_FEATURES					0x4
+#define  GPU_L2_FEATURES_LINE_SIZE(x)			(1 << ((x) & GENMASK(7, 0)))
+
+#define GPU_CORE_FEATURES				0x8
+
+#define GPU_TILER_FEATURES				0xC
+#define GPU_MEM_FEATURES				0x10
+#define   GROUPS_L2_COHERENT				BIT(0)
+
+#define GPU_MMU_FEATURES				0x14
+#define  GPU_MMU_FEATURES_VA_BITS(x)			((x) & GENMASK(7, 0))
+#define  GPU_MMU_FEATURES_PA_BITS(x)			(((x) >> 8) & GENMASK(7, 0))
+#define GPU_AS_PRESENT					0x18
+#define GPU_CSF_ID					0x1C
+
+#define GPU_INT_RAWSTAT					0x20
+#define GPU_INT_CLEAR					0x24
+#define GPU_INT_MASK					0x28
+#define GPU_INT_STAT					0x2c
+#define   GPU_IRQ_FAULT					BIT(0)
+#define   GPU_IRQ_PROTM_FAULT				BIT(1)
+#define   GPU_IRQ_RESET_COMPLETED			BIT(8)
+#define   GPU_IRQ_POWER_CHANGED				BIT(9)
+#define   GPU_IRQ_POWER_CHANGED_ALL			BIT(10)
+#define   GPU_IRQ_CLEAN_CACHES_COMPLETED		BIT(17)
+#define   GPU_IRQ_DOORBELL_MIRROR			BIT(18)
+#define   GPU_IRQ_MCU_STATUS_CHANGED			BIT(19)
+#define GPU_CMD						0x30
+#define   GPU_CMD_DEF(type, payload)			((type) | ((payload) << 8))
+#define   GPU_SOFT_RESET				GPU_CMD_DEF(1, 1)
+#define   GPU_HARD_RESET				GPU_CMD_DEF(1, 2)
+#define   CACHE_CLEAN					BIT(0)
+#define   CACHE_INV					BIT(1)
+#define   GPU_FLUSH_CACHES(l2, lsc, oth)		\
+	  GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8))
+
+#define GPU_STATUS					0x34
+#define   GPU_STATUS_ACTIVE				BIT(0)
+#define   GPU_STATUS_PWR_ACTIVE				BIT(1)
+#define   GPU_STATUS_PAGE_FAULT				BIT(4)
+#define   GPU_STATUS_PROTM_ACTIVE			BIT(7)
+#define   GPU_STATUS_DBG_ENABLED			BIT(8)
+
+#define GPU_FAULT_STATUS				0x3C
+#define GPU_FAULT_ADDR_LO				0x40
+#define GPU_FAULT_ADDR_HI				0x44
+
+#define GPU_PWR_KEY					0x50
+#define  GPU_PWR_KEY_UNLOCK				0x2968A819
+#define GPU_PWR_OVERRIDE0				0x54
+#define GPU_PWR_OVERRIDE1				0x58
+
+#define GPU_TIMESTAMP_OFFSET_LO				0x88
+#define GPU_TIMESTAMP_OFFSET_HI				0x8C
+#define GPU_CYCLE_COUNT_LO				0x90
+#define GPU_CYCLE_COUNT_HI				0x94
+#define GPU_TIMESTAMP_LO				0x98
+#define GPU_TIMESTAMP_HI				0x9C
+
+#define GPU_THREAD_MAX_THREADS				0xA0
+#define GPU_THREAD_MAX_WORKGROUP_SIZE			0xA4
+#define GPU_THREAD_MAX_BARRIER_SIZE			0xA8
+#define GPU_THREAD_FEATURES				0xAC
+
+#define GPU_TEXTURE_FEATURES(n)				(0xB0 + ((n) * 4))
+
+#define GPU_SHADER_PRESENT_LO				0x100
+#define GPU_SHADER_PRESENT_HI				0x104
+#define GPU_TILER_PRESENT_LO				0x110
+#define GPU_TILER_PRESENT_HI				0x114
+#define GPU_L2_PRESENT_LO				0x120
+#define GPU_L2_PRESENT_HI				0x124
+
+#define SHADER_READY_LO					0x140
+#define SHADER_READY_HI					0x144
+#define TILER_READY_LO					0x150
+#define TILER_READY_HI					0x154
+#define L2_READY_LO					0x160
+#define L2_READY_HI					0x164
+
+#define SHADER_PWRON_LO					0x180
+#define SHADER_PWRON_HI					0x184
+#define TILER_PWRON_LO					0x190
+#define TILER_PWRON_HI					0x194
+#define L2_PWRON_LO					0x1A0
+#define L2_PWRON_HI					0x1A4
+
+#define SHADER_PWROFF_LO				0x1C0
+#define SHADER_PWROFF_HI				0x1C4
+#define TILER_PWROFF_LO					0x1D0
+#define TILER_PWROFF_HI					0x1D4
+#define L2_PWROFF_LO					0x1E0
+#define L2_PWROFF_HI					0x1E4
+
+#define SHADER_PWRTRANS_LO				0x200
+#define SHADER_PWRTRANS_HI				0x204
+#define TILER_PWRTRANS_LO				0x210
+#define TILER_PWRTRANS_HI				0x214
+#define L2_PWRTRANS_LO					0x220
+#define L2_PWRTRANS_HI					0x224
+
+#define SHADER_PWRACTIVE_LO				0x240
+#define SHADER_PWRACTIVE_HI				0x244
+#define TILER_PWRACTIVE_LO				0x250
+#define TILER_PWRACTIVE_HI				0x254
+#define L2_PWRACTIVE_LO					0x260
+#define L2_PWRACTIVE_HI					0x264
+
+#define GPU_REVID					0x280
+
+#define GPU_COHERENCY_FEATURES				0x300
+#define GPU_COHERENCY_PROT_BIT(name)			BIT(GPU_COHERENCY_  ## name)
+
+#define GPU_COHERENCY_PROTOCOL				0x304
+#define   GPU_COHERENCY_ACE				0
+#define   GPU_COHERENCY_ACE_LITE			1
+#define   GPU_COHERENCY_NONE				31
+
+#define MCU_CONTROL					0x700
+#define MCU_CONTROL_ENABLE				1
+#define MCU_CONTROL_AUTO				2
+#define MCU_CONTROL_DISABLE				0
+
+#define MCU_STATUS					0x704
+#define MCU_STATUS_DISABLED				0
+#define MCU_STATUS_ENABLED				1
+#define MCU_STATUS_HALT					2
+#define MCU_STATUS_FATAL				3
+
+/* Job Control regs */
+#define JOB_INT_RAWSTAT					0x1000
+#define JOB_INT_CLEAR					0x1004
+#define JOB_INT_MASK					0x1008
+#define JOB_INT_STAT					0x100c
+#define   JOB_INT_GLOBAL_IF				BIT(31)
+#define   JOB_INT_CSG_IF(x)				BIT(x)
+
+/* MMU regs */
+#define MMU_INT_RAWSTAT					0x2000
+#define MMU_INT_CLEAR					0x2004
+#define MMU_INT_MASK					0x2008
+#define MMU_INT_STAT					0x200c
+
+/* AS_COMMAND register commands */
+
+#define MMU_BASE					0x2400
+#define MMU_AS_SHIFT					6
+#define MMU_AS(as)					(MMU_BASE + ((as) << MMU_AS_SHIFT))
+
+#define AS_TRANSTAB_LO(as)				(MMU_AS(as) + 0x0)
+#define AS_TRANSTAB_HI(as)				(MMU_AS(as) + 0x4)
+#define AS_MEMATTR_LO(as)				(MMU_AS(as) + 0x8)
+#define AS_MEMATTR_HI(as)				(MMU_AS(as) + 0xC)
+#define   AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL		(2 << 2)
+#define   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r)	((3 << 2) | \
+							 ((w) ? BIT(0) : 0) | \
+							 ((r) ? BIT(1) : 0))
+#define   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER		(0 << 4)
+#define   AS_MEMATTR_AARCH64_SH_CPU_INNER		(1 << 4)
+#define   AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH	(2 << 4)
+#define   AS_MEMATTR_AARCH64_SHARED			(0 << 6)
+#define   AS_MEMATTR_AARCH64_INNER_OUTER_NC		(1 << 6)
+#define   AS_MEMATTR_AARCH64_INNER_OUTER_WB		(2 << 6)
+#define   AS_MEMATTR_AARCH64_FAULT			(3 << 6)
+#define AS_LOCKADDR_LO(as)				(MMU_AS(as) + 0x10)
+#define AS_LOCKADDR_HI(as)				(MMU_AS(as) + 0x14)
+#define AS_COMMAND(as)					(MMU_AS(as) + 0x18)
+#define   AS_COMMAND_NOP				0
+#define   AS_COMMAND_UPDATE				1
+#define   AS_COMMAND_LOCK				2
+#define   AS_COMMAND_UNLOCK				3
+#define   AS_COMMAND_FLUSH_PT				4
+#define   AS_COMMAND_FLUSH_MEM				5
+#define   AS_LOCK_REGION_MIN_SIZE			(1ULL << 15)
+#define AS_FAULTSTATUS(as)				(MMU_AS(as) + 0x1C)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_MASK		(0x3 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC		(0x0 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_EX			(0x1 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_WRITE		(0x3 << 8)
+#define AS_FAULTADDRESS_LO(as)				(MMU_AS(as) + 0x20)
+#define AS_FAULTADDRESS_HI(as)				(MMU_AS(as) + 0x24)
+#define AS_STATUS(as)					(MMU_AS(as) + 0x28)
+#define   AS_STATUS_AS_ACTIVE				BIT(0)
+#define AS_TRANSCFG_LO(as)				(MMU_AS(as) + 0x30)
+#define AS_TRANSCFG_HI(as)				(MMU_AS(as) + 0x34)
+#define   AS_TRANSCFG_ADRMODE_UNMAPPED			(1 << 0)
+#define   AS_TRANSCFG_ADRMODE_IDENTITY			(2 << 0)
+#define   AS_TRANSCFG_ADRMODE_AARCH64_4K		(6 << 0)
+#define   AS_TRANSCFG_ADRMODE_AARCH64_64K		(8 << 0)
+#define   AS_TRANSCFG_INA_BITS(x)			((x) << 6)
+#define   AS_TRANSCFG_OUTA_BITS(x)			((x) << 14)
+#define   AS_TRANSCFG_SL_CONCAT				BIT(22)
+#define   AS_TRANSCFG_PTW_MEMATTR_NC			(1 << 24)
+#define   AS_TRANSCFG_PTW_MEMATTR_WB			(2 << 24)
+#define   AS_TRANSCFG_PTW_SH_NS				(0 << 28)
+#define   AS_TRANSCFG_PTW_SH_OS				(2 << 28)
+#define   AS_TRANSCFG_PTW_SH_IS				(3 << 28)
+#define   AS_TRANSCFG_PTW_RA				BIT(30)
+#define   AS_TRANSCFG_DISABLE_HIER_AP			BIT(33)
+#define   AS_TRANSCFG_DISABLE_AF_FAULT			BIT(34)
+#define   AS_TRANSCFG_WXN				BIT(35)
+#define   AS_TRANSCFG_XREADABLE				BIT(36)
+#define AS_FAULTEXTRA_LO(as)				(MMU_AS(as) + 0x38)
+#define AS_FAULTEXTRA_HI(as)				(MMU_AS(as) + 0x3C)
+
+#define CSF_GPU_LATEST_FLUSH_ID				0x10000
+
+#define CSF_DOORBELL(i)					(0x80000 + ((i) * 0x10000))
+#define CSF_GLB_DOORBELL_ID				0
+
+#define gpu_write(dev, reg, data) \
+	writel(data, (dev)->iomem + (reg))
+
+#define gpu_read(dev, reg) \
+	readl((dev)->iomem + (reg))
+
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
new file mode 100644
index 000000000000..5f7803b6fc48
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -0,0 +1,3502 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+#include <linux/build_bug.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-resv.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/iosys-map.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gem.h"
+#include "panthor_gpu.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+/**
+ * DOC: Scheduler
+ *
+ * Mali CSF hardware adopts a firmware-assisted scheduling model, where
+ * the firmware takes care of scheduling aspects, to some extent.
+ *
+ * The scheduling happens at the scheduling group level, each group
+ * contains 1 to N queues (N is FW/hardware dependent, and exposed
+ * through the firmware interface). Each queue is assigned a command
+ * stream ring buffer, which serves as a way to get jobs submitted to
+ * the GPU, among other things.
+ *
+ * The firmware can schedule a maximum of M groups (M is FW/hardware
+ * dependent, and exposed through the firmware interface). Passed
+ * this maximum number of groups, the kernel must take care of
+ * rotating the groups passed to the firmware so every group gets
+ * a chance to have his queues scheduled for execution.
+ *
+ * The current implementation only supports with kernel-mode queues.
+ * In other terms, userspace doesn't have access to the ring-buffer.
+ * Instead, userspace passes indirect command stream buffers that are
+ * called from the queue ring-buffer by the kernel using a pre-defined
+ * sequence of command stream instructions to ensure the userspace driver
+ * always gets consistent results (cache maintenance,
+ * synchronization, ...).
+ *
+ * We rely on the drm_gpu_scheduler framework to deal with job
+ * dependencies and submission. As any other driver dealing with a
+ * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
+ * entity has its own job scheduler. When a job is ready to be executed
+ * (all its dependencies are met), it is pushed to the appropriate
+ * queue ring-buffer, and the group is scheduled for execution if it
+ * wasn't already active.
+ *
+ * Kernel-side group scheduling is timeslice-based. When we have less
+ * groups than there are slots, the periodic tick is disabled and we
+ * just let the FW schedule the active groups. When there are more
+ * groups than slots, we let each group a chance to execute stuff for
+ * a given amount of time, and then re-evaluate and pick new groups
+ * to schedule. The group selection algorithm is based on
+ * priority+round-robin.
+ *
+ * Even though user-mode queues is out of the scope right now, the
+ * current design takes them into account by avoiding any guess on the
+ * group/queue state that would be based on information we wouldn't have
+ * if userspace was in charge of the ring-buffer. That's also one of the
+ * reason we don't do 'cooperative' scheduling (encoding FW group slot
+ * reservation as dma_fence that would be returned from the
+ * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
+ * a queue of waiters, ordered by job submission order). This approach
+ * would work for kernel-mode queues, but would make user-mode queues a
+ * lot more complicated to retrofit.
+ */
+
+#define JOB_TIMEOUT_MS				5000
+
+#define MIN_CS_PER_CSG				8
+
+#define MIN_CSGS				3
+#define MAX_CSG_PRIO				0xf
+
+struct panthor_group;
+
+/**
+ * struct panthor_csg_slot - Command stream group slot
+ *
+ * This represents a FW slot for a scheduling group.
+ */
+struct panthor_csg_slot {
+	/** @group: Scheduling group bound to this slot. */
+	struct panthor_group *group;
+
+	/** @priority: Group priority. */
+	u8 priority;
+
+	/**
+	 * @idle: True if the group bound to this slot is idle.
+	 *
+	 * A group is idle when it has nothing waiting for execution on
+	 * all its queues, or when queues are blocked waiting for something
+	 * to happen (synchronization object).
+	 */
+	bool idle;
+};
+
+/**
+ * enum panthor_csg_priority - Group priority
+ */
+enum panthor_csg_priority {
+	/** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
+	PANTHOR_CSG_PRIORITY_LOW = 0,
+
+	/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
+	PANTHOR_CSG_PRIORITY_MEDIUM,
+
+	/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
+	PANTHOR_CSG_PRIORITY_HIGH,
+
+	/**
+	 * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
+	 *
+	 * Real-time priority allows one to preempt scheduling of other
+	 * non-real-time groups. When such a group becomes executable,
+	 * it will evict the group with the lowest non-rt priority if
+	 * there's no free group slot available.
+	 *
+	 * Currently not exposed to userspace.
+	 */
+	PANTHOR_CSG_PRIORITY_RT,
+
+	/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
+	PANTHOR_CSG_PRIORITY_COUNT,
+};
+
+/**
+ * struct panthor_scheduler - Object used to manage the scheduler
+ */
+struct panthor_scheduler {
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/**
+	 * @wq: Workqueue used by our internal scheduler logic and
+	 * drm_gpu_scheduler.
+	 *
+	 * Used for the scheduler tick, group update or other kind of FW
+	 * event processing that can't be handled in the threaded interrupt
+	 * path. Also passed to the drm_gpu_scheduler instances embedded
+	 * in panthor_queue.
+	 */
+	struct workqueue_struct *wq;
+
+	/**
+	 * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
+	 *
+	 * We have a queue dedicated to heap chunk allocation works to avoid
+	 * blocking the rest of the scheduler if the allocation tries to
+	 * reclaim memory.
+	 */
+	struct workqueue_struct *heap_alloc_wq;
+
+	/** @tick_work: Work executed on a scheduling tick. */
+	struct delayed_work tick_work;
+
+	/**
+	 * @sync_upd_work: Work used to process synchronization object updates.
+	 *
+	 * We use this work to unblock queues/groups that were waiting on a
+	 * synchronization object.
+	 */
+	struct work_struct sync_upd_work;
+
+	/**
+	 * @fw_events_work: Work used to process FW events outside the interrupt path.
+	 *
+	 * Even if the interrupt is threaded, we need any event processing
+	 * that require taking the panthor_scheduler::lock to be processed
+	 * outside the interrupt path so we don't block the tick logic when
+	 * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
+	 * event processing requires taking this lock, we just delegate all
+	 * FW event processing to the scheduler workqueue.
+	 */
+	struct work_struct fw_events_work;
+
+	/**
+	 * @fw_events: Bitmask encoding pending FW events.
+	 */
+	atomic_t fw_events;
+
+	/**
+	 * @resched_target: When the next tick should occur.
+	 *
+	 * Expressed in jiffies.
+	 */
+	u64 resched_target;
+
+	/**
+	 * @last_tick: When the last tick occurred.
+	 *
+	 * Expressed in jiffies.
+	 */
+	u64 last_tick;
+
+	/** @tick_period: Tick period in jiffies. */
+	u64 tick_period;
+
+	/**
+	 * @lock: Lock protecting access to all the scheduler fields.
+	 *
+	 * Should be taken in the tick work, the irq handler, and anywhere the @groups
+	 * fields are touched.
+	 */
+	struct mutex lock;
+
+	/** @groups: Various lists used to classify groups. */
+	struct {
+		/**
+		 * @runnable: Runnable group lists.
+		 *
+		 * When a group has queues that want to execute something,
+		 * its panthor_group::run_node should be inserted here.
+		 *
+		 * One list per-priority.
+		 */
+		struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
+
+		/**
+		 * @idle: Idle group lists.
+		 *
+		 * When all queues of a group are idle (either because they
+		 * have nothing to execute, or because they are blocked), the
+		 * panthor_group::run_node field should be inserted here.
+		 *
+		 * One list per-priority.
+		 */
+		struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
+
+		/**
+		 * @waiting: List of groups whose queues are blocked on a
+		 * synchronization object.
+		 *
+		 * Insert panthor_group::wait_node here when a group is waiting
+		 * for synchronization objects to be signaled.
+		 *
+		 * This list is evaluated in the @sync_upd_work work.
+		 */
+		struct list_head waiting;
+	} groups;
+
+	/**
+	 * @csg_slots: FW command stream group slots.
+	 */
+	struct panthor_csg_slot csg_slots[MAX_CSGS];
+
+	/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
+	u32 csg_slot_count;
+
+	/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
+	u32 cs_slot_count;
+
+	/** @as_slot_count: Number of address space slots supported by the MMU. */
+	u32 as_slot_count;
+
+	/** @used_csg_slot_count: Number of command stream group slot currently used. */
+	u32 used_csg_slot_count;
+
+	/** @sb_slot_count: Number of scoreboard slots. */
+	u32 sb_slot_count;
+
+	/**
+	 * @might_have_idle_groups: True if an active group might have become idle.
+	 *
+	 * This will force a tick, so other runnable groups can be scheduled if one
+	 * or more active groups became idle.
+	 */
+	bool might_have_idle_groups;
+
+	/** @pm: Power management related fields. */
+	struct {
+		/** @has_ref: True if the scheduler owns a runtime PM reference. */
+		bool has_ref;
+	} pm;
+
+	/** @reset: Reset related fields. */
+	struct {
+		/** @lock: Lock protecting the other reset fields. */
+		struct mutex lock;
+
+		/**
+		 * @in_progress: True if a reset is in progress.
+		 *
+		 * Set to true in panthor_sched_pre_reset() and back to false in
+		 * panthor_sched_post_reset().
+		 */
+		atomic_t in_progress;
+
+		/**
+		 * @stopped_groups: List containing all groups that were stopped
+		 * before a reset.
+		 *
+		 * Insert panthor_group::run_node in the pre_reset path.
+		 */
+		struct list_head stopped_groups;
+	} reset;
+};
+
+/**
+ * struct panthor_syncobj_32b - 32-bit FW synchronization object
+ */
+struct panthor_syncobj_32b {
+	/** @seqno: Sequence number. */
+	u32 seqno;
+
+	/**
+	 * @status: Status.
+	 *
+	 * Not zero on failure.
+	 */
+	u32 status;
+};
+
+/**
+ * struct panthor_syncobj_64b - 64-bit FW synchronization object
+ */
+struct panthor_syncobj_64b {
+	/** @seqno: Sequence number. */
+	u64 seqno;
+
+	/**
+	 * @status: Status.
+	 *
+	 * Not zero on failure.
+	 */
+	u32 status;
+
+	/** @pad: MBZ. */
+	u32 pad;
+};
+
+/**
+ * struct panthor_queue - Execution queue
+ */
+struct panthor_queue {
+	/** @scheduler: DRM scheduler used for this queue. */
+	struct drm_gpu_scheduler scheduler;
+
+	/** @entity: DRM scheduling entity used for this queue. */
+	struct drm_sched_entity entity;
+
+	/**
+	 * @remaining_time: Time remaining before the job timeout expires.
+	 *
+	 * The job timeout is suspended when the queue is not scheduled by the
+	 * FW. Every time we suspend the timer, we need to save the remaining
+	 * time so we can restore it later on.
+	 */
+	unsigned long remaining_time;
+
+	/** @timeout_suspended: True if the job timeout was suspended. */
+	bool timeout_suspended;
+
+	/**
+	 * @doorbell_id: Doorbell assigned to this queue.
+	 *
+	 * Right now, all groups share the same doorbell, and the doorbell ID
+	 * is assigned to group_slot + 1 when the group is assigned a slot. But
+	 * we might decide to provide fine grained doorbell assignment at some
+	 * point, so don't have to wake up all queues in a group every time one
+	 * of them is updated.
+	 */
+	u8 doorbell_id;
+
+	/**
+	 * @priority: Priority of the queue inside the group.
+	 *
+	 * Must be less than 16 (Only 4 bits available).
+	 */
+	u8 priority;
+#define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
+
+	/** @ringbuf: Command stream ring-buffer. */
+	struct panthor_kernel_bo *ringbuf;
+
+	/** @iface: Firmware interface. */
+	struct {
+		/** @mem: FW memory allocated for this interface. */
+		struct panthor_kernel_bo *mem;
+
+		/** @input: Input interface. */
+		struct panthor_fw_ringbuf_input_iface *input;
+
+		/** @output: Output interface. */
+		const struct panthor_fw_ringbuf_output_iface *output;
+
+		/** @input_fw_va: FW virtual address of the input interface buffer. */
+		u32 input_fw_va;
+
+		/** @output_fw_va: FW virtual address of the output interface buffer. */
+		u32 output_fw_va;
+	} iface;
+
+	/**
+	 * @syncwait: Stores information about the synchronization object this
+	 * queue is waiting on.
+	 */
+	struct {
+		/** @gpu_va: GPU address of the synchronization object. */
+		u64 gpu_va;
+
+		/** @ref: Reference value to compare against. */
+		u64 ref;
+
+		/** @gt: True if this is a greater-than test. */
+		bool gt;
+
+		/** @sync64: True if this is a 64-bit sync object. */
+		bool sync64;
+
+		/** @bo: Buffer object holding the synchronization object. */
+		struct drm_gem_object *obj;
+
+		/** @offset: Offset of the synchronization object inside @bo. */
+		u64 offset;
+
+		/**
+		 * @kmap: Kernel mapping of the buffer object holding the
+		 * synchronization object.
+		 */
+		void *kmap;
+	} syncwait;
+
+	/** @fence_ctx: Fence context fields. */
+	struct {
+		/** @lock: Used to protect access to all fences allocated by this context. */
+		spinlock_t lock;
+
+		/**
+		 * @id: Fence context ID.
+		 *
+		 * Allocated with dma_fence_context_alloc().
+		 */
+		u64 id;
+
+		/** @seqno: Sequence number of the last initialized fence. */
+		atomic64_t seqno;
+
+		/**
+		 * @in_flight_jobs: List containing all in-flight jobs.
+		 *
+		 * Used to keep track and signal panthor_job::done_fence when the
+		 * synchronization object attached to the queue is signaled.
+		 */
+		struct list_head in_flight_jobs;
+	} fence_ctx;
+};
+
+/**
+ * enum panthor_group_state - Scheduling group state.
+ */
+enum panthor_group_state {
+	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
+	PANTHOR_CS_GROUP_CREATED,
+
+	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
+	PANTHOR_CS_GROUP_ACTIVE,
+
+	/**
+	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
+	 * inactive/suspended right now.
+	 */
+	PANTHOR_CS_GROUP_SUSPENDED,
+
+	/**
+	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
+	 *
+	 * Can no longer be scheduled. The only allowed action is a destruction.
+	 */
+	PANTHOR_CS_GROUP_TERMINATED,
+};
+
+/**
+ * struct panthor_group - Scheduling group object
+ */
+struct panthor_group {
+	/** @refcount: Reference count */
+	struct kref refcount;
+
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/** @vm: VM bound to the group. */
+	struct panthor_vm *vm;
+
+	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
+	u64 compute_core_mask;
+
+	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
+	u64 fragment_core_mask;
+
+	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
+	u64 tiler_core_mask;
+
+	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
+	u8 max_compute_cores;
+
+	/** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */
+	u8 max_fragment_cores;
+
+	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
+	u8 max_tiler_cores;
+
+	/** @priority: Group priority (check panthor_csg_priority). */
+	u8 priority;
+
+	/** @blocked_queues: Bitmask reflecting the blocked queues. */
+	u32 blocked_queues;
+
+	/** @idle_queues: Bitmask reflecting the idle queues. */
+	u32 idle_queues;
+
+	/** @fatal_lock: Lock used to protect access to fatal fields. */
+	spinlock_t fatal_lock;
+
+	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
+	u32 fatal_queues;
+
+	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
+	atomic_t tiler_oom;
+
+	/** @queue_count: Number of queues in this group. */
+	u32 queue_count;
+
+	/** @queues: Queues owned by this group. */
+	struct panthor_queue *queues[MAX_CS_PER_CSG];
+
+	/**
+	 * @csg_id: ID of the FW group slot.
+	 *
+	 * -1 when the group is not scheduled/active.
+	 */
+	int csg_id;
+
+	/**
+	 * @destroyed: True when the group has been destroyed.
+	 *
+	 * If a group is destroyed it becomes useless: no further jobs can be submitted
+	 * to its queues. We simply wait for all references to be dropped so we can
+	 * release the group object.
+	 */
+	bool destroyed;
+
+	/**
+	 * @timedout: True when a timeout occurred on any of the queues owned by
+	 * this group.
+	 *
+	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
+	 * timeout situation is unrecoverable, and the group becomes useless.
+	 * We simply wait for all references to be dropped so we can release the
+	 * group object.
+	 */
+	bool timedout;
+
+	/**
+	 * @syncobjs: Pool of per-queue synchronization objects.
+	 *
+	 * One sync object per queue. The position of the sync object is
+	 * determined by the queue index.
+	 */
+	struct panthor_kernel_bo *syncobjs;
+
+	/** @state: Group state. */
+	enum panthor_group_state state;
+
+	/**
+	 * @suspend_buf: Suspend buffer.
+	 *
+	 * Stores the state of the group and its queues when a group is suspended.
+	 * Used at resume time to restore the group in its previous state.
+	 *
+	 * The size of the suspend buffer is exposed through the FW interface.
+	 */
+	struct panthor_kernel_bo *suspend_buf;
+
+	/**
+	 * @protm_suspend_buf: Protection mode suspend buffer.
+	 *
+	 * Stores the state of the group and its queues when a group that's in
+	 * protection mode is suspended.
+	 *
+	 * Used at resume time to restore the group in its previous state.
+	 *
+	 * The size of the protection mode suspend buffer is exposed through the
+	 * FW interface.
+	 */
+	struct panthor_kernel_bo *protm_suspend_buf;
+
+	/** @sync_upd_work: Work used to check/signal job fences. */
+	struct work_struct sync_upd_work;
+
+	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
+	struct work_struct tiler_oom_work;
+
+	/** @term_work: Work used to finish the group termination procedure. */
+	struct work_struct term_work;
+
+	/**
+	 * @release_work: Work used to release group resources.
+	 *
+	 * We need to postpone the group release to avoid a deadlock when
+	 * the last ref is released in the tick work.
+	 */
+	struct work_struct release_work;
+
+	/**
+	 * @run_node: Node used to insert the group in the
+	 * panthor_group::groups::{runnable,idle} and
+	 * panthor_group::reset.stopped_groups lists.
+	 */
+	struct list_head run_node;
+
+	/**
+	 * @wait_node: Node used to insert the group in the
+	 * panthor_group::groups::waiting list.
+	 */
+	struct list_head wait_node;
+};
+
+/**
+ * group_queue_work() - Queue a group work
+ * @group: Group to queue the work for.
+ * @wname: Work name.
+ *
+ * Grabs a ref and queue a work item to the scheduler workqueue. If
+ * the work was already queued, we release the reference we grabbed.
+ *
+ * Work callbacks must release the reference we grabbed here.
+ */
+#define group_queue_work(group, wname) \
+	do { \
+		group_get(group); \
+		if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
+			group_put(group); \
+	} while (0)
+
+/**
+ * sched_queue_work() - Queue a scheduler work.
+ * @sched: Scheduler object.
+ * @wname: Work name.
+ *
+ * Conditionally queues a scheduler work if no reset is pending/in-progress.
+ */
+#define sched_queue_work(sched, wname) \
+	do { \
+		if (!atomic_read(&(sched)->reset.in_progress) && \
+		    !panthor_device_reset_is_pending((sched)->ptdev)) \
+			queue_work((sched)->wq, &(sched)->wname ## _work); \
+	} while (0)
+
+/**
+ * sched_queue_delayed_work() - Queue a scheduler delayed work.
+ * @sched: Scheduler object.
+ * @wname: Work name.
+ * @delay: Work delay in jiffies.
+ *
+ * Conditionally queues a scheduler delayed work if no reset is
+ * pending/in-progress.
+ */
+#define sched_queue_delayed_work(sched, wname, delay) \
+	do { \
+		if (!atomic_read(&sched->reset.in_progress) && \
+		    !panthor_device_reset_is_pending((sched)->ptdev)) \
+			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
+	} while (0)
+
+/*
+ * We currently set the maximum of groups per file to an arbitrary low value.
+ * But this can be updated if we need more.
+ */
+#define MAX_GROUPS_PER_POOL 128
+
+/**
+ * struct panthor_group_pool - Group pool
+ *
+ * Each file get assigned a group pool.
+ */
+struct panthor_group_pool {
+	/** @xa: Xarray used to manage group handles. */
+	struct xarray xa;
+};
+
+/**
+ * struct panthor_job - Used to manage GPU job
+ */
+struct panthor_job {
+	/** @base: Inherit from drm_sched_job. */
+	struct drm_sched_job base;
+
+	/** @refcount: Reference count. */
+	struct kref refcount;
+
+	/** @group: Group of the queue this job will be pushed to. */
+	struct panthor_group *group;
+
+	/** @queue_idx: Index of the queue inside @group. */
+	u32 queue_idx;
+
+	/** @call_info: Information about the userspace command stream call. */
+	struct {
+		/** @start: GPU address of the userspace command stream. */
+		u64 start;
+
+		/** @size: Size of the userspace command stream. */
+		u32 size;
+
+		/**
+		 * @latest_flush: Flush ID at the time the userspace command
+		 * stream was built.
+		 *
+		 * Needed for the flush reduction mechanism.
+		 */
+		u32 latest_flush;
+	} call_info;
+
+	/** @ringbuf: Position of this job is in the ring buffer. */
+	struct {
+		/** @start: Start offset. */
+		u64 start;
+
+		/** @end: End offset. */
+		u64 end;
+	} ringbuf;
+
+	/**
+	 * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
+	 * list.
+	 */
+	struct list_head node;
+
+	/** @done_fence: Fence signaled when the job is finished or cancelled. */
+	struct dma_fence *done_fence;
+};
+
+static void
+panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
+{
+	if (queue->syncwait.kmap) {
+		struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
+
+		drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
+		queue->syncwait.kmap = NULL;
+	}
+
+	drm_gem_object_put(queue->syncwait.obj);
+	queue->syncwait.obj = NULL;
+}
+
+static void *
+panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_gem_object *bo;
+	struct iosys_map map;
+	int ret;
+
+	if (queue->syncwait.kmap)
+		return queue->syncwait.kmap + queue->syncwait.offset;
+
+	bo = panthor_vm_get_bo_for_va(group->vm,
+				      queue->syncwait.gpu_va,
+				      &queue->syncwait.offset);
+	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
+		goto err_put_syncwait_obj;
+
+	queue->syncwait.obj = &bo->base.base;
+	ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		goto err_put_syncwait_obj;
+
+	queue->syncwait.kmap = map.vaddr;
+	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
+		goto err_put_syncwait_obj;
+
+	return queue->syncwait.kmap + queue->syncwait.offset;
+
+err_put_syncwait_obj:
+	panthor_queue_put_syncwait_obj(queue);
+	return NULL;
+}
+
+static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
+{
+	if (IS_ERR_OR_NULL(queue))
+		return;
+
+	if (queue->entity.fence_context)
+		drm_sched_entity_destroy(&queue->entity);
+
+	if (queue->scheduler.ops)
+		drm_sched_fini(&queue->scheduler);
+
+	panthor_queue_put_syncwait_obj(queue);
+
+	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
+	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
+
+	kfree(queue);
+}
+
+static void group_release_work(struct work_struct *work)
+{
+	struct panthor_group *group = container_of(work,
+						   struct panthor_group,
+						   release_work);
+	struct panthor_device *ptdev = group->ptdev;
+	u32 i;
+
+	for (i = 0; i < group->queue_count; i++)
+		group_free_queue(group, group->queues[i]);
+
+	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
+	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
+	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
+
+	panthor_vm_put(group->vm);
+	kfree(group);
+}
+
+static void group_release(struct kref *kref)
+{
+	struct panthor_group *group = container_of(kref,
+						   struct panthor_group,
+						   refcount);
+	struct panthor_device *ptdev = group->ptdev;
+
+	drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
+	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
+	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
+
+	queue_work(panthor_cleanup_wq, &group->release_work);
+}
+
+static void group_put(struct panthor_group *group)
+{
+	if (group)
+		kref_put(&group->refcount, group_release);
+}
+
+static struct panthor_group *
+group_get(struct panthor_group *group)
+{
+	if (group)
+		kref_get(&group->refcount);
+
+	return group;
+}
+
+/**
+ * group_bind_locked() - Bind a group to a group slot
+ * @group: Group.
+ * @csg_id: Slot.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+group_bind_locked(struct panthor_group *group, u32 csg_id)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_csg_slot *csg_slot;
+	int ret;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
+			ptdev->scheduler->csg_slots[csg_id].group))
+		return -EINVAL;
+
+	ret = panthor_vm_active(group->vm);
+	if (ret)
+		return ret;
+
+	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	group_get(group);
+	group->csg_id = csg_id;
+
+	/* Dummy doorbell allocation: doorbell is assigned to the group and
+	 * all queues use the same doorbell.
+	 *
+	 * TODO: Implement LRU-based doorbell assignment, so the most often
+	 * updated queues get their own doorbell, thus avoiding useless checks
+	 * on queues belonging to the same group that are rarely updated.
+	 */
+	for (u32 i = 0; i < group->queue_count; i++)
+		group->queues[i]->doorbell_id = csg_id + 1;
+
+	csg_slot->group = group;
+
+	return 0;
+}
+
+/**
+ * group_unbind_locked() - Unbind a group from a slot.
+ * @group: Group to unbind.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+group_unbind_locked(struct panthor_group *group)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_csg_slot *slot;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
+		return -EINVAL;
+
+	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
+		return -EINVAL;
+
+	slot = &ptdev->scheduler->csg_slots[group->csg_id];
+	panthor_vm_idle(group->vm);
+	group->csg_id = -1;
+
+	/* Tiler OOM events will be re-issued next time the group is scheduled. */
+	atomic_set(&group->tiler_oom, 0);
+	cancel_work(&group->tiler_oom_work);
+
+	for (u32 i = 0; i < group->queue_count; i++)
+		group->queues[i]->doorbell_id = -1;
+
+	slot->group = NULL;
+
+	group_put(group);
+	return 0;
+}
+
+/**
+ * cs_slot_prog_locked() - Program a queue slot
+ * @ptdev: Device.
+ * @csg_id: Group slot ID.
+ * @cs_id: Queue slot ID.
+ *
+ * Program a queue slot with the queue information so things can start being
+ * executed on this queue.
+ *
+ * The group slot must have a group bound to it already (group_bind_locked()).
+ */
+static void
+cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
+{
+	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
+	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	queue->iface.input->extract = queue->iface.output->extract;
+	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
+
+	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
+	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
+	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
+	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
+				  CS_CONFIG_DOORBELL(queue->doorbell_id);
+	cs_iface->input->ack_irq_mask = ~0;
+	panthor_fw_update_reqs(cs_iface, req,
+			       CS_IDLE_SYNC_WAIT |
+			       CS_IDLE_EMPTY |
+			       CS_STATE_START |
+			       CS_EXTRACT_EVENT,
+			       CS_IDLE_SYNC_WAIT |
+			       CS_IDLE_EMPTY |
+			       CS_STATE_MASK |
+			       CS_EXTRACT_EVENT);
+	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
+		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
+		queue->timeout_suspended = false;
+	}
+}
+
+/**
+ * @cs_slot_reset_locked() - Reset a queue slot
+ * @ptdev: Device.
+ * @csg_id: Group slot.
+ * @cs_id: Queue slot.
+ *
+ * Change the queue slot state to STOP and suspend the queue timeout if
+ * the queue is not blocked.
+ *
+ * The group slot must have a group bound to it (group_bind_locked()).
+ */
+static int
+cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
+{
+	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
+	struct panthor_queue *queue = group->queues[cs_id];
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	panthor_fw_update_reqs(cs_iface, req,
+			       CS_STATE_STOP,
+			       CS_STATE_MASK);
+
+	/* If the queue is blocked, we want to keep the timeout running, so
+	 * we can detect unbounded waits and kill the group when that happens.
+	 */
+	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
+		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
+		queue->timeout_suspended = true;
+		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
+	}
+
+	return 0;
+}
+
+/**
+ * csg_slot_sync_priority_locked() - Synchronize the group slot priority
+ * @ptdev: Device.
+ * @csg_id: Group slot ID.
+ *
+ * Group slot priority update happens asynchronously. When we receive a
+ * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
+ * reflect it to our panthor_csg_slot object.
+ */
+static void
+csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_fw_csg_iface *csg_iface;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
+}
+
+/**
+ * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
+ * @ptdev: Device.
+ * @csg_id: Group slot.
+ * @cs_id: Queue slot.
+ *
+ * Queue state is updated on group suspend or STATUS_UPDATE event.
+ */
+static void
+cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
+{
+	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
+	struct panthor_queue *queue = group->queues[cs_id];
+	struct panthor_fw_cs_iface *cs_iface =
+		panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
+
+	u32 status_wait_cond;
+
+	switch (cs_iface->output->status_blocked_reason) {
+	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
+		if (queue->iface.input->insert == queue->iface.output->extract &&
+		    cs_iface->output->status_scoreboards == 0)
+			group->idle_queues |= BIT(cs_id);
+		break;
+
+	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
+		if (list_empty(&group->wait_node)) {
+			list_move_tail(&group->wait_node,
+				       &group->ptdev->scheduler->groups.waiting);
+		}
+		group->blocked_queues |= BIT(cs_id);
+		queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
+		queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
+		status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
+		queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
+		if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
+			u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
+
+			queue->syncwait.sync64 = true;
+			queue->syncwait.ref |= sync_val_hi << 32;
+		} else {
+			queue->syncwait.sync64 = false;
+		}
+		break;
+
+	default:
+		/* Other reasons are not blocking. Consider the queue as runnable
+		 * in those cases.
+		 */
+		break;
+	}
+}
+
+static void
+csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+	u32 i;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	group->idle_queues = 0;
+	group->blocked_queues = 0;
+
+	for (i = 0; i < group->queue_count; i++) {
+		if (group->queues[i])
+			cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
+	}
+}
+
+static void
+csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_fw_csg_iface *csg_iface;
+	struct panthor_group *group;
+	enum panthor_group_state new_state, old_state;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	group = csg_slot->group;
+
+	if (!group)
+		return;
+
+	old_state = group->state;
+	switch (csg_iface->output->ack & CSG_STATE_MASK) {
+	case CSG_STATE_START:
+	case CSG_STATE_RESUME:
+		new_state = PANTHOR_CS_GROUP_ACTIVE;
+		break;
+	case CSG_STATE_TERMINATE:
+		new_state = PANTHOR_CS_GROUP_TERMINATED;
+		break;
+	case CSG_STATE_SUSPEND:
+		new_state = PANTHOR_CS_GROUP_SUSPENDED;
+		break;
+	}
+
+	if (old_state == new_state)
+		return;
+
+	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
+		csg_slot_sync_queues_state_locked(ptdev, csg_id);
+
+	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
+		u32 i;
+
+		/* Reset the queue slots so we start from a clean
+		 * state when starting/resuming a new group on this
+		 * CSG slot. No wait needed here, and no ringbell
+		 * either, since the CS slot will only be re-used
+		 * on the next CSG start operation.
+		 */
+		for (i = 0; i < group->queue_count; i++) {
+			if (group->queues[i])
+				cs_slot_reset_locked(ptdev, csg_id, i);
+		}
+	}
+
+	group->state = new_state;
+}
+
+static int
+csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
+{
+	struct panthor_fw_csg_iface *csg_iface;
+	struct panthor_csg_slot *csg_slot;
+	struct panthor_group *group;
+	u32 queue_mask = 0, i;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (priority > MAX_CSG_PRIO)
+		return -EINVAL;
+
+	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
+		return -EINVAL;
+
+	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	group = csg_slot->group;
+	if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
+		return 0;
+
+	csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
+
+	for (i = 0; i < group->queue_count; i++) {
+		if (group->queues[i]) {
+			cs_slot_prog_locked(ptdev, csg_id, i);
+			queue_mask |= BIT(i);
+		}
+	}
+
+	csg_iface->input->allow_compute = group->compute_core_mask;
+	csg_iface->input->allow_fragment = group->fragment_core_mask;
+	csg_iface->input->allow_other = group->tiler_core_mask;
+	csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
+					 CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
+					 CSG_EP_REQ_TILER(group->max_tiler_cores) |
+					 CSG_EP_REQ_PRIORITY(priority);
+	csg_iface->input->config = panthor_vm_as(group->vm);
+
+	if (group->suspend_buf)
+		csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
+	else
+		csg_iface->input->suspend_buf = 0;
+
+	if (group->protm_suspend_buf) {
+		csg_iface->input->protm_suspend_buf =
+			panthor_kernel_bo_gpuva(group->protm_suspend_buf);
+	} else {
+		csg_iface->input->protm_suspend_buf = 0;
+	}
+
+	csg_iface->input->ack_irq_mask = ~0;
+	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
+	return 0;
+}
+
+static void
+cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
+				   u32 csg_id, u32 cs_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+	struct panthor_fw_cs_iface *cs_iface;
+	u32 fatal;
+	u64 info;
+
+	lockdep_assert_held(&sched->lock);
+
+	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	fatal = cs_iface->output->fatal;
+	info = cs_iface->output->fatal_info;
+
+	if (group)
+		group->fatal_queues |= BIT(cs_id);
+
+	sched_queue_delayed_work(sched, tick, 0);
+	drm_warn(&ptdev->base,
+		 "CSG slot %d CS slot: %d\n"
+		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
+		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
+		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
+		 csg_id, cs_id,
+		 (unsigned int)CS_EXCEPTION_TYPE(fatal),
+		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
+		 (unsigned int)CS_EXCEPTION_DATA(fatal),
+		 info);
+}
+
+static void
+cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
+				   u32 csg_id, u32 cs_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+	struct panthor_queue *queue = group && cs_id < group->queue_count ?
+				      group->queues[cs_id] : NULL;
+	struct panthor_fw_cs_iface *cs_iface;
+	u32 fault;
+	u64 info;
+
+	lockdep_assert_held(&sched->lock);
+
+	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	fault = cs_iface->output->fault;
+	info = cs_iface->output->fault_info;
+
+	if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
+		u64 cs_extract = queue->iface.output->extract;
+		struct panthor_job *job;
+
+		spin_lock(&queue->fence_ctx.lock);
+		list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
+			if (cs_extract >= job->ringbuf.end)
+				continue;
+
+			if (cs_extract < job->ringbuf.start)
+				break;
+
+			dma_fence_set_error(job->done_fence, -EINVAL);
+		}
+		spin_unlock(&queue->fence_ctx.lock);
+	}
+
+	drm_warn(&ptdev->base,
+		 "CSG slot %d CS slot: %d\n"
+		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
+		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
+		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
+		 csg_id, cs_id,
+		 (unsigned int)CS_EXCEPTION_TYPE(fault),
+		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
+		 (unsigned int)CS_EXCEPTION_DATA(fault),
+		 info);
+}
+
+static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	u32 renderpasses_in_flight, pending_frag_count;
+	struct panthor_heap_pool *heaps = NULL;
+	u64 heap_address, new_chunk_va = 0;
+	u32 vt_start, vt_end, frag_end;
+	int ret, csg_id;
+
+	mutex_lock(&sched->lock);
+	csg_id = group->csg_id;
+	if (csg_id >= 0) {
+		struct panthor_fw_cs_iface *cs_iface;
+
+		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+		heaps = panthor_vm_get_heap_pool(group->vm, false);
+		heap_address = cs_iface->output->heap_address;
+		vt_start = cs_iface->output->heap_vt_start;
+		vt_end = cs_iface->output->heap_vt_end;
+		frag_end = cs_iface->output->heap_frag_end;
+		renderpasses_in_flight = vt_start - frag_end;
+		pending_frag_count = vt_end - frag_end;
+	}
+	mutex_unlock(&sched->lock);
+
+	/* The group got scheduled out, we stop here. We will get a new tiler OOM event
+	 * when it's scheduled again.
+	 */
+	if (unlikely(csg_id < 0))
+		return 0;
+
+	if (!heaps || frag_end > vt_end || vt_end >= vt_start) {
+		ret = -EINVAL;
+	} else {
+		/* We do the allocation without holding the scheduler lock to avoid
+		 * blocking the scheduling.
+		 */
+		ret = panthor_heap_grow(heaps, heap_address,
+					renderpasses_in_flight,
+					pending_frag_count, &new_chunk_va);
+	}
+
+	if (ret && ret != -EBUSY) {
+		drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
+		group->fatal_queues |= BIT(cs_id);
+		sched_queue_delayed_work(sched, tick, 0);
+		goto out_put_heap_pool;
+	}
+
+	mutex_lock(&sched->lock);
+	csg_id = group->csg_id;
+	if (csg_id >= 0) {
+		struct panthor_fw_csg_iface *csg_iface;
+		struct panthor_fw_cs_iface *cs_iface;
+
+		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+
+		cs_iface->input->heap_start = new_chunk_va;
+		cs_iface->input->heap_end = new_chunk_va;
+		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
+		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
+		panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
+	}
+	mutex_unlock(&sched->lock);
+
+	/* We allocated a chunck, but couldn't link it to the heap
+	 * context because the group was scheduled out while we were
+	 * allocating memory. We need to return this chunk to the heap.
+	 */
+	if (unlikely(csg_id < 0 && new_chunk_va))
+		panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
+
+	ret = 0;
+
+out_put_heap_pool:
+	panthor_heap_pool_put(heaps);
+	return ret;
+}
+
+static void group_tiler_oom_work(struct work_struct *work)
+{
+	struct panthor_group *group =
+		container_of(work, struct panthor_group, tiler_oom_work);
+	u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
+
+	while (tiler_oom) {
+		u32 cs_id = ffs(tiler_oom) - 1;
+
+		group_process_tiler_oom(group, cs_id);
+		tiler_oom &= ~BIT(cs_id);
+	}
+
+	group_put(group);
+}
+
+static void
+cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
+				       u32 csg_id, u32 cs_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+
+	lockdep_assert_held(&sched->lock);
+
+	if (drm_WARN_ON(&ptdev->base, !group))
+		return;
+
+	atomic_or(BIT(cs_id), &group->tiler_oom);
+
+	/* We don't use group_queue_work() here because we want to queue the
+	 * work item to the heap_alloc_wq.
+	 */
+	group_get(group);
+	if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
+		group_put(group);
+}
+
+static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
+				       u32 csg_id, u32 cs_id)
+{
+	struct panthor_fw_cs_iface *cs_iface;
+	u32 req, ack, events;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	req = cs_iface->input->req;
+	ack = cs_iface->output->ack;
+	events = (req ^ ack) & CS_EVT_MASK;
+
+	if (events & CS_FATAL)
+		cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
+
+	if (events & CS_FAULT)
+		cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
+
+	if (events & CS_TILER_OOM)
+		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
+
+	/* We don't acknowledge the TILER_OOM event since its handling is
+	 * deferred to a separate work.
+	 */
+	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
+
+	return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
+}
+
+static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_fw_csg_iface *csg_iface;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
+}
+
+static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+
+	lockdep_assert_held(&sched->lock);
+
+	sched->might_have_idle_groups = true;
+
+	/* Schedule a tick so we can evict idle groups and schedule non-idle
+	 * ones. This will also update runtime PM and devfreq busy/idle states,
+	 * so the device can lower its frequency or get suspended.
+	 */
+	sched_queue_delayed_work(sched, tick, 0);
+}
+
+static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
+					u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (group)
+		group_queue_work(group, sync_upd);
+
+	sched_queue_work(ptdev->scheduler, sync_upd);
+}
+
+static void
+csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+
+	lockdep_assert_held(&sched->lock);
+
+	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
+
+	group = csg_slot->group;
+	if (!drm_WARN_ON(&ptdev->base, !group))
+		group->timedout = true;
+
+	sched_queue_delayed_work(sched, tick, 0);
+}
+
+static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
+	struct panthor_fw_csg_iface *csg_iface;
+	u32 ring_cs_db_mask = 0;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
+		return;
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	req = READ_ONCE(csg_iface->input->req);
+	ack = READ_ONCE(csg_iface->output->ack);
+	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
+	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
+	csg_events = (req ^ ack) & CSG_EVT_MASK;
+
+	/* There may not be any pending CSG/CS interrupts to process */
+	if (req == ack && cs_irq_req == cs_irq_ack)
+		return;
+
+	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
+	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
+	 * doesn't miss an interrupt for the CS in the race scenario where
+	 * whilst Host is servicing an interrupt for the CS, firmware sends
+	 * another interrupt for that CS.
+	 */
+	csg_iface->input->cs_irq_ack = cs_irq_req;
+
+	panthor_fw_update_reqs(csg_iface, req, ack,
+			       CSG_SYNC_UPDATE |
+			       CSG_IDLE |
+			       CSG_PROGRESS_TIMER_EVENT);
+
+	if (csg_events & CSG_IDLE)
+		csg_slot_process_idle_event_locked(ptdev, csg_id);
+
+	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
+		csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
+
+	cs_irqs = cs_irq_req ^ cs_irq_ack;
+	while (cs_irqs) {
+		u32 cs_id = ffs(cs_irqs) - 1;
+
+		if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
+			ring_cs_db_mask |= BIT(cs_id);
+
+		cs_irqs &= ~BIT(cs_id);
+	}
+
+	if (csg_events & CSG_SYNC_UPDATE)
+		csg_slot_sync_update_locked(ptdev, csg_id);
+
+	if (ring_cs_db_mask)
+		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
+
+	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
+}
+
+static void sched_process_idle_event_locked(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	/* Acknowledge the idle event and schedule a tick. */
+	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
+	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
+}
+
+/**
+ * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ
+ * @ptdev: Device.
+ */
+static void sched_process_global_irq_locked(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	u32 req, ack, evts;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	req = READ_ONCE(glb_iface->input->req);
+	ack = READ_ONCE(glb_iface->output->ack);
+	evts = (req ^ ack) & GLB_EVT_MASK;
+
+	if (evts & GLB_IDLE)
+		sched_process_idle_event_locked(ptdev);
+}
+
+static void process_fw_events_work(struct work_struct *work)
+{
+	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
+						      fw_events_work);
+	u32 events = atomic_xchg(&sched->fw_events, 0);
+	struct panthor_device *ptdev = sched->ptdev;
+
+	mutex_lock(&sched->lock);
+
+	if (events & JOB_INT_GLOBAL_IF) {
+		sched_process_global_irq_locked(ptdev);
+		events &= ~JOB_INT_GLOBAL_IF;
+	}
+
+	while (events) {
+		u32 csg_id = ffs(events) - 1;
+
+		sched_process_csg_irq_locked(ptdev, csg_id);
+		events &= ~BIT(csg_id);
+	}
+
+	mutex_unlock(&sched->lock);
+}
+
+/**
+ * panthor_sched_report_fw_events() - Report FW events to the scheduler.
+ */
+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
+{
+	if (!ptdev->scheduler)
+		return;
+
+	atomic_or(events, &ptdev->scheduler->fw_events);
+	sched_queue_work(ptdev->scheduler, fw_events);
+}
+
+static const char *fence_get_driver_name(struct dma_fence *fence)
+{
+	return "panthor";
+}
+
+static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "queue-fence";
+}
+
+static const struct dma_fence_ops panthor_queue_fence_ops = {
+	.get_driver_name = fence_get_driver_name,
+	.get_timeline_name = queue_fence_get_timeline_name,
+};
+
+/**
+ */
+struct panthor_csg_slots_upd_ctx {
+	u32 update_mask;
+	u32 timedout_mask;
+	struct {
+		u32 value;
+		u32 mask;
+	} requests[MAX_CSGS];
+};
+
+static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
+{
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
+				    struct panthor_csg_slots_upd_ctx *ctx,
+				    u32 csg_id, u32 value, u32 mask)
+{
+	if (drm_WARN_ON(&ptdev->base, !mask) ||
+	    drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
+		return;
+
+	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
+	ctx->requests[csg_id].mask |= mask;
+	ctx->update_mask |= BIT(csg_id);
+}
+
+static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
+				     struct panthor_csg_slots_upd_ctx *ctx)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	u32 update_slots = ctx->update_mask;
+
+	lockdep_assert_held(&sched->lock);
+
+	if (!ctx->update_mask)
+		return 0;
+
+	while (update_slots) {
+		struct panthor_fw_csg_iface *csg_iface;
+		u32 csg_id = ffs(update_slots) - 1;
+
+		update_slots &= ~BIT(csg_id);
+		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+		panthor_fw_update_reqs(csg_iface, req,
+				       ctx->requests[csg_id].value,
+				       ctx->requests[csg_id].mask);
+	}
+
+	panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
+
+	update_slots = ctx->update_mask;
+	while (update_slots) {
+		struct panthor_fw_csg_iface *csg_iface;
+		u32 csg_id = ffs(update_slots) - 1;
+		u32 req_mask = ctx->requests[csg_id].mask, acked;
+		int ret;
+
+		update_slots &= ~BIT(csg_id);
+		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+
+		ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
+
+		if (acked & CSG_ENDPOINT_CONFIG)
+			csg_slot_sync_priority_locked(ptdev, csg_id);
+
+		if (acked & CSG_STATE_MASK)
+			csg_slot_sync_state_locked(ptdev, csg_id);
+
+		if (acked & CSG_STATUS_UPDATE) {
+			csg_slot_sync_queues_state_locked(ptdev, csg_id);
+			csg_slot_sync_idle_state_locked(ptdev, csg_id);
+		}
+
+		if (ret && acked != req_mask &&
+		    ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
+			drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
+			ctx->timedout_mask |= BIT(csg_id);
+		}
+	}
+
+	if (ctx->timedout_mask)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+struct panthor_sched_tick_ctx {
+	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
+	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
+	u32 idle_group_count;
+	u32 group_count;
+	enum panthor_csg_priority min_priority;
+	struct panthor_vm *vms[MAX_CS_PER_CSG];
+	u32 as_count;
+	bool immediate_tick;
+	u32 csg_upd_failed_mask;
+};
+
+static bool
+tick_ctx_is_full(const struct panthor_scheduler *sched,
+		 const struct panthor_sched_tick_ctx *ctx)
+{
+	return ctx->group_count == sched->csg_slot_count;
+}
+
+static bool
+group_is_idle(struct panthor_group *group)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	u32 inactive_queues;
+
+	if (group->csg_id >= 0)
+		return ptdev->scheduler->csg_slots[group->csg_id].idle;
+
+	inactive_queues = group->idle_queues | group->blocked_queues;
+	return hweight32(inactive_queues) == group->queue_count;
+}
+
+static bool
+group_can_run(struct panthor_group *group)
+{
+	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
+	       !group->destroyed && group->fatal_queues == 0 &&
+	       !group->timedout;
+}
+
+static void
+tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
+			       struct panthor_sched_tick_ctx *ctx,
+			       struct list_head *queue,
+			       bool skip_idle_groups,
+			       bool owned_by_tick_ctx)
+{
+	struct panthor_group *group, *tmp;
+
+	if (tick_ctx_is_full(sched, ctx))
+		return;
+
+	list_for_each_entry_safe(group, tmp, queue, run_node) {
+		u32 i;
+
+		if (!group_can_run(group))
+			continue;
+
+		if (skip_idle_groups && group_is_idle(group))
+			continue;
+
+		for (i = 0; i < ctx->as_count; i++) {
+			if (ctx->vms[i] == group->vm)
+				break;
+		}
+
+		if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
+			continue;
+
+		if (!owned_by_tick_ctx)
+			group_get(group);
+
+		list_move_tail(&group->run_node, &ctx->groups[group->priority]);
+		ctx->group_count++;
+		if (group_is_idle(group))
+			ctx->idle_group_count++;
+
+		if (i == ctx->as_count)
+			ctx->vms[ctx->as_count++] = group->vm;
+
+		if (ctx->min_priority > group->priority)
+			ctx->min_priority = group->priority;
+
+		if (tick_ctx_is_full(sched, ctx))
+			return;
+	}
+}
+
+static void
+tick_ctx_insert_old_group(struct panthor_scheduler *sched,
+			  struct panthor_sched_tick_ctx *ctx,
+			  struct panthor_group *group,
+			  bool full_tick)
+{
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
+	struct panthor_group *other_group;
+
+	if (!full_tick) {
+		list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
+		return;
+	}
+
+	/* Rotate to make sure groups with lower CSG slot
+	 * priorities have a chance to get a higher CSG slot
+	 * priority next time they get picked. This priority
+	 * has an impact on resource request ordering, so it's
+	 * important to make sure we don't let one group starve
+	 * all other groups with the same group priority.
+	 */
+	list_for_each_entry(other_group,
+			    &ctx->old_groups[csg_slot->group->priority],
+			    run_node) {
+		struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
+
+		if (other_csg_slot->priority > csg_slot->priority) {
+			list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
+			return;
+		}
+	}
+
+	list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
+}
+
+static void
+tick_ctx_init(struct panthor_scheduler *sched,
+	      struct panthor_sched_tick_ctx *ctx,
+	      bool full_tick)
+{
+	struct panthor_device *ptdev = sched->ptdev;
+	struct panthor_csg_slots_upd_ctx upd_ctx;
+	int ret;
+	u32 i;
+
+	memset(ctx, 0, sizeof(*ctx));
+	csgs_upd_ctx_init(&upd_ctx);
+
+	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		INIT_LIST_HEAD(&ctx->groups[i]);
+		INIT_LIST_HEAD(&ctx->old_groups[i]);
+	}
+
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+		struct panthor_group *group = csg_slot->group;
+		struct panthor_fw_csg_iface *csg_iface;
+
+		if (!group)
+			continue;
+
+		csg_iface = panthor_fw_get_csg_iface(ptdev, i);
+		group_get(group);
+
+		/* If there was unhandled faults on the VM, force processing of
+		 * CSG IRQs, so we can flag the faulty queue.
+		 */
+		if (panthor_vm_has_unhandled_faults(group->vm)) {
+			sched_process_csg_irq_locked(ptdev, i);
+
+			/* No fatal fault reported, flag all queues as faulty. */
+			if (!group->fatal_queues)
+				group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
+		}
+
+		tick_ctx_insert_old_group(sched, ctx, group, full_tick);
+		csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
+					csg_iface->output->ack ^ CSG_STATUS_UPDATE,
+					CSG_STATUS_UPDATE);
+	}
+
+	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
+	}
+}
+
+#define NUM_INSTRS_PER_SLOT		16
+
+static void
+group_term_post_processing(struct panthor_group *group)
+{
+	struct panthor_job *job, *tmp;
+	LIST_HEAD(faulty_jobs);
+	bool cookie;
+	u32 i = 0;
+
+	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
+		return;
+
+	cookie = dma_fence_begin_signalling();
+	for (i = 0; i < group->queue_count; i++) {
+		struct panthor_queue *queue = group->queues[i];
+		struct panthor_syncobj_64b *syncobj;
+		int err;
+
+		if (group->fatal_queues & BIT(i))
+			err = -EINVAL;
+		else if (group->timedout)
+			err = -ETIMEDOUT;
+		else
+			err = -ECANCELED;
+
+		if (!queue)
+			continue;
+
+		spin_lock(&queue->fence_ctx.lock);
+		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
+			list_move_tail(&job->node, &faulty_jobs);
+			dma_fence_set_error(job->done_fence, err);
+			dma_fence_signal_locked(job->done_fence);
+		}
+		spin_unlock(&queue->fence_ctx.lock);
+
+		/* Manually update the syncobj seqno to unblock waiters. */
+		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
+		syncobj->status = ~0;
+		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
+		sched_queue_work(group->ptdev->scheduler, sync_upd);
+	}
+	dma_fence_end_signalling(cookie);
+
+	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
+		list_del_init(&job->node);
+		panthor_job_put(&job->base);
+	}
+}
+
+static void group_term_work(struct work_struct *work)
+{
+	struct panthor_group *group =
+		container_of(work, struct panthor_group, term_work);
+
+	group_term_post_processing(group);
+	group_put(group);
+}
+
+static void
+tick_ctx_cleanup(struct panthor_scheduler *sched,
+		 struct panthor_sched_tick_ctx *ctx)
+{
+	struct panthor_group *group, *tmp;
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
+		list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
+			/* If everything went fine, we should only have groups
+			 * to be terminated in the old_groups lists.
+			 */
+			drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
+				    group_can_run(group));
+
+			if (!group_can_run(group)) {
+				list_del_init(&group->run_node);
+				list_del_init(&group->wait_node);
+				group_queue_work(group, term);
+			} else if (group->csg_id >= 0) {
+				list_del_init(&group->run_node);
+			} else {
+				list_move(&group->run_node,
+					  group_is_idle(group) ?
+					  &sched->groups.idle[group->priority] :
+					  &sched->groups.runnable[group->priority]);
+			}
+			group_put(group);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		/* If everything went fine, the groups to schedule lists should
+		 * be empty.
+		 */
+		drm_WARN_ON(&group->ptdev->base,
+			    !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
+
+		list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
+			if (group->csg_id >= 0) {
+				list_del_init(&group->run_node);
+			} else {
+				list_move(&group->run_node,
+					  group_is_idle(group) ?
+					  &sched->groups.idle[group->priority] :
+					  &sched->groups.runnable[group->priority]);
+			}
+			group_put(group);
+		}
+	}
+}
+
+static void
+tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
+{
+	struct panthor_group *group, *tmp;
+	struct panthor_device *ptdev = sched->ptdev;
+	struct panthor_csg_slot *csg_slot;
+	int prio, new_csg_prio = MAX_CSG_PRIO, i;
+	u32 csg_mod_mask = 0, free_csg_slots = 0;
+	struct panthor_csg_slots_upd_ctx upd_ctx;
+	int ret;
+
+	csgs_upd_ctx_init(&upd_ctx);
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		/* Suspend or terminate evicted groups. */
+		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
+			bool term = !group_can_run(group);
+			int csg_id = group->csg_id;
+
+			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
+				continue;
+
+			csg_slot = &sched->csg_slots[csg_id];
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
+						CSG_STATE_MASK);
+		}
+
+		/* Update priorities on already running groups. */
+		list_for_each_entry(group, &ctx->groups[prio], run_node) {
+			struct panthor_fw_csg_iface *csg_iface;
+			int csg_id = group->csg_id;
+
+			if (csg_id < 0) {
+				new_csg_prio--;
+				continue;
+			}
+
+			csg_slot = &sched->csg_slots[csg_id];
+			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+			if (csg_slot->priority == new_csg_prio) {
+				new_csg_prio--;
+				continue;
+			}
+
+			panthor_fw_update_reqs(csg_iface, endpoint_req,
+					       CSG_EP_REQ_PRIORITY(new_csg_prio),
+					       CSG_EP_REQ_PRIORITY_MASK);
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
+						CSG_ENDPOINT_CONFIG);
+			new_csg_prio--;
+		}
+	}
+
+	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
+		return;
+	}
+
+	/* Unbind evicted groups. */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
+			/* This group is gone. Process interrupts to clear
+			 * any pending interrupts before we start the new
+			 * group.
+			 */
+			if (group->csg_id >= 0)
+				sched_process_csg_irq_locked(ptdev, group->csg_id);
+
+			group_unbind_locked(group);
+		}
+	}
+
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		if (!sched->csg_slots[i].group)
+			free_csg_slots |= BIT(i);
+	}
+
+	csgs_upd_ctx_init(&upd_ctx);
+	new_csg_prio = MAX_CSG_PRIO;
+
+	/* Start new groups. */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		list_for_each_entry(group, &ctx->groups[prio], run_node) {
+			int csg_id = group->csg_id;
+			struct panthor_fw_csg_iface *csg_iface;
+
+			if (csg_id >= 0) {
+				new_csg_prio--;
+				continue;
+			}
+
+			csg_id = ffs(free_csg_slots) - 1;
+			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
+				break;
+
+			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+			csg_slot = &sched->csg_slots[csg_id];
+			csg_mod_mask |= BIT(csg_id);
+			group_bind_locked(group, csg_id);
+			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						group->state == PANTHOR_CS_GROUP_SUSPENDED ?
+						CSG_STATE_RESUME : CSG_STATE_START,
+						CSG_STATE_MASK);
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
+						CSG_ENDPOINT_CONFIG);
+			free_csg_slots &= ~BIT(csg_id);
+		}
+	}
+
+	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
+		return;
+	}
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
+			list_del_init(&group->run_node);
+
+			/* If the group has been destroyed while we were
+			 * scheduling, ask for an immediate tick to
+			 * re-evaluate as soon as possible and get rid of
+			 * this dangling group.
+			 */
+			if (group->destroyed)
+				ctx->immediate_tick = true;
+			group_put(group);
+		}
+
+		/* Return evicted groups to the idle or run queues. Groups
+		 * that can no longer be run (because they've been destroyed
+		 * or experienced an unrecoverable error) will be scheduled
+		 * for destruction in tick_ctx_cleanup().
+		 */
+		list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
+			if (!group_can_run(group))
+				continue;
+
+			if (group_is_idle(group))
+				list_move_tail(&group->run_node, &sched->groups.idle[prio]);
+			else
+				list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
+			group_put(group);
+		}
+	}
+
+	sched->used_csg_slot_count = ctx->group_count;
+	sched->might_have_idle_groups = ctx->idle_group_count > 0;
+}
+
+static u64
+tick_ctx_update_resched_target(struct panthor_scheduler *sched,
+			       const struct panthor_sched_tick_ctx *ctx)
+{
+	/* We had space left, no need to reschedule until some external event happens. */
+	if (!tick_ctx_is_full(sched, ctx))
+		goto no_tick;
+
+	/* If idle groups were scheduled, no need to wake up until some external
+	 * event happens (group unblocked, new job submitted, ...).
+	 */
+	if (ctx->idle_group_count)
+		goto no_tick;
+
+	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
+		goto no_tick;
+
+	/* If there are groups of the same priority waiting, we need to
+	 * keep the scheduler ticking, otherwise, we'll just wait for
+	 * new groups with higher priority to be queued.
+	 */
+	if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
+		u64 resched_target = sched->last_tick + sched->tick_period;
+
+		if (time_before64(sched->resched_target, sched->last_tick) ||
+		    time_before64(resched_target, sched->resched_target))
+			sched->resched_target = resched_target;
+
+		return sched->resched_target - sched->last_tick;
+	}
+
+no_tick:
+	sched->resched_target = U64_MAX;
+	return U64_MAX;
+}
+
+static void tick_work(struct work_struct *work)
+{
+	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
+						      tick_work.work);
+	struct panthor_device *ptdev = sched->ptdev;
+	struct panthor_sched_tick_ctx ctx;
+	u64 remaining_jiffies = 0, resched_delay;
+	u64 now = get_jiffies_64();
+	int prio, ret, cookie;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		goto out_dev_exit;
+
+	if (time_before64(now, sched->resched_target))
+		remaining_jiffies = sched->resched_target - now;
+
+	mutex_lock(&sched->lock);
+	if (panthor_device_reset_is_pending(sched->ptdev))
+		goto out_unlock;
+
+	tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
+	if (ctx.csg_upd_failed_mask)
+		goto out_cleanup_ctx;
+
+	if (remaining_jiffies) {
+		/* Scheduling forced in the middle of a tick. Only RT groups
+		 * can preempt non-RT ones. Currently running RT groups can't be
+		 * preempted.
+		 */
+		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
+		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
+		     prio--) {
+			tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
+						       true, true);
+			if (prio == PANTHOR_CSG_PRIORITY_RT) {
+				tick_ctx_pick_groups_from_list(sched, &ctx,
+							       &sched->groups.runnable[prio],
+							       true, false);
+			}
+		}
+	}
+
+	/* First pick non-idle groups */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
+	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
+	     prio--) {
+		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
+					       true, false);
+		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
+	}
+
+	/* If we have free CSG slots left, pick idle groups */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
+	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
+	     prio--) {
+		/* Check the old_group queue first to avoid reprogramming the slots */
+		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
+		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
+					       false, false);
+	}
+
+	tick_ctx_apply(sched, &ctx);
+	if (ctx.csg_upd_failed_mask)
+		goto out_cleanup_ctx;
+
+	if (ctx.idle_group_count == ctx.group_count) {
+		panthor_devfreq_record_idle(sched->ptdev);
+		if (sched->pm.has_ref) {
+			pm_runtime_put_autosuspend(ptdev->base.dev);
+			sched->pm.has_ref = false;
+		}
+	} else {
+		panthor_devfreq_record_busy(sched->ptdev);
+		if (!sched->pm.has_ref) {
+			pm_runtime_get(ptdev->base.dev);
+			sched->pm.has_ref = true;
+		}
+	}
+
+	sched->last_tick = now;
+	resched_delay = tick_ctx_update_resched_target(sched, &ctx);
+	if (ctx.immediate_tick)
+		resched_delay = 0;
+
+	if (resched_delay != U64_MAX)
+		sched_queue_delayed_work(sched, tick, resched_delay);
+
+out_cleanup_ctx:
+	tick_ctx_cleanup(sched, &ctx);
+
+out_unlock:
+	mutex_unlock(&sched->lock);
+	pm_runtime_mark_last_busy(ptdev->base.dev);
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+}
+
+static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
+{
+	struct panthor_queue *queue = group->queues[queue_idx];
+	union {
+		struct panthor_syncobj_64b sync64;
+		struct panthor_syncobj_32b sync32;
+	} *syncobj;
+	bool result;
+	u64 value;
+
+	syncobj = panthor_queue_get_syncwait_obj(group, queue);
+	if (!syncobj)
+		return -EINVAL;
+
+	value = queue->syncwait.sync64 ?
+		syncobj->sync64.seqno :
+		syncobj->sync32.seqno;
+
+	if (queue->syncwait.gt)
+		result = value > queue->syncwait.ref;
+	else
+		result = value <= queue->syncwait.ref;
+
+	if (result)
+		panthor_queue_put_syncwait_obj(queue);
+
+	return result;
+}
+
+static void sync_upd_work(struct work_struct *work)
+{
+	struct panthor_scheduler *sched = container_of(work,
+						      struct panthor_scheduler,
+						      sync_upd_work);
+	struct panthor_group *group, *tmp;
+	bool immediate_tick = false;
+
+	mutex_lock(&sched->lock);
+	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
+		u32 tested_queues = group->blocked_queues;
+		u32 unblocked_queues = 0;
+
+		while (tested_queues) {
+			u32 cs_id = ffs(tested_queues) - 1;
+			int ret;
+
+			ret = panthor_queue_eval_syncwait(group, cs_id);
+			drm_WARN_ON(&group->ptdev->base, ret < 0);
+			if (ret)
+				unblocked_queues |= BIT(cs_id);
+
+			tested_queues &= ~BIT(cs_id);
+		}
+
+		if (unblocked_queues) {
+			group->blocked_queues &= ~unblocked_queues;
+
+			if (group->csg_id < 0) {
+				list_move(&group->run_node,
+					  &sched->groups.runnable[group->priority]);
+				if (group->priority == PANTHOR_CSG_PRIORITY_RT)
+					immediate_tick = true;
+			}
+		}
+
+		if (!group->blocked_queues)
+			list_del_init(&group->wait_node);
+	}
+	mutex_unlock(&sched->lock);
+
+	if (immediate_tick)
+		sched_queue_delayed_work(sched, tick, 0);
+}
+
+static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct list_head *queue = &sched->groups.runnable[group->priority];
+	u64 delay_jiffies = 0;
+	bool was_idle;
+	u64 now;
+
+	if (!group_can_run(group))
+		return;
+
+	/* All updated queues are blocked, no need to wake up the scheduler. */
+	if ((queue_mask & group->blocked_queues) == queue_mask)
+		return;
+
+	was_idle = group_is_idle(group);
+	group->idle_queues &= ~queue_mask;
+
+	/* Don't mess up with the lists if we're in a middle of a reset. */
+	if (atomic_read(&sched->reset.in_progress))
+		return;
+
+	if (was_idle && !group_is_idle(group))
+		list_move_tail(&group->run_node, queue);
+
+	/* RT groups are preemptive. */
+	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
+		sched_queue_delayed_work(sched, tick, 0);
+		return;
+	}
+
+	/* Some groups might be idle, force an immediate tick to
+	 * re-evaluate.
+	 */
+	if (sched->might_have_idle_groups) {
+		sched_queue_delayed_work(sched, tick, 0);
+		return;
+	}
+
+	/* Scheduler is ticking, nothing to do. */
+	if (sched->resched_target != U64_MAX) {
+		/* If there are free slots, force immediating ticking. */
+		if (sched->used_csg_slot_count < sched->csg_slot_count)
+			sched_queue_delayed_work(sched, tick, 0);
+
+		return;
+	}
+
+	/* Scheduler tick was off, recalculate the resched_target based on the
+	 * last tick event, and queue the scheduler work.
+	 */
+	now = get_jiffies_64();
+	sched->resched_target = sched->last_tick + sched->tick_period;
+	if (sched->used_csg_slot_count == sched->csg_slot_count &&
+	    time_before64(now, sched->resched_target))
+		delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
+
+	sched_queue_delayed_work(sched, tick, delay_jiffies);
+}
+
+static void queue_stop(struct panthor_queue *queue,
+		       struct panthor_job *bad_job)
+{
+	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
+}
+
+static void queue_start(struct panthor_queue *queue)
+{
+	struct panthor_job *job;
+
+	/* Re-assign the parent fences. */
+	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
+		job->base.s_fence->parent = dma_fence_get(job->done_fence);
+
+	drm_sched_start(&queue->scheduler, true);
+}
+
+static void panthor_group_stop(struct panthor_group *group)
+{
+	struct panthor_scheduler *sched = group->ptdev->scheduler;
+
+	lockdep_assert_held(&sched->reset.lock);
+
+	for (u32 i = 0; i < group->queue_count; i++)
+		queue_stop(group->queues[i], NULL);
+
+	group_get(group);
+	list_move_tail(&group->run_node, &sched->reset.stopped_groups);
+}
+
+static void panthor_group_start(struct panthor_group *group)
+{
+	struct panthor_scheduler *sched = group->ptdev->scheduler;
+
+	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
+
+	for (u32 i = 0; i < group->queue_count; i++)
+		queue_start(group->queues[i]);
+
+	if (group_can_run(group)) {
+		list_move_tail(&group->run_node,
+			       group_is_idle(group) ?
+			       &sched->groups.idle[group->priority] :
+			       &sched->groups.runnable[group->priority]);
+	} else {
+		list_del_init(&group->run_node);
+		list_del_init(&group->wait_node);
+		group_queue_work(group, term);
+	}
+
+	group_put(group);
+}
+
+static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+
+	sched_queue_delayed_work(sched, tick, 0);
+}
+
+/**
+ * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
+ */
+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
+{
+	/* Force a tick to immediately kill faulty groups. */
+	if (ptdev->scheduler)
+		panthor_sched_immediate_tick(ptdev);
+}
+
+void panthor_sched_resume(struct panthor_device *ptdev)
+{
+	/* Force a tick to re-evaluate after a resume. */
+	panthor_sched_immediate_tick(ptdev);
+}
+
+void panthor_sched_suspend(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slots_upd_ctx upd_ctx;
+	u64 suspended_slots, faulty_slots;
+	struct panthor_group *group;
+	u32 i;
+
+	mutex_lock(&sched->lock);
+	csgs_upd_ctx_init(&upd_ctx);
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+
+		if (csg_slot->group) {
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
+						CSG_STATE_SUSPEND,
+						CSG_STATE_MASK);
+		}
+	}
+
+	suspended_slots = upd_ctx.update_mask;
+
+	csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	suspended_slots &= ~upd_ctx.timedout_mask;
+	faulty_slots = upd_ctx.timedout_mask;
+
+	if (faulty_slots) {
+		u32 slot_mask = faulty_slots;
+
+		drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
+		csgs_upd_ctx_init(&upd_ctx);
+		while (slot_mask) {
+			u32 csg_id = ffs(slot_mask) - 1;
+
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						CSG_STATE_TERMINATE,
+						CSG_STATE_MASK);
+			slot_mask &= ~BIT(csg_id);
+		}
+
+		csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+
+		slot_mask = upd_ctx.timedout_mask;
+		while (slot_mask) {
+			u32 csg_id = ffs(slot_mask) - 1;
+			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+
+			/* Terminate command timedout, but the soft-reset will
+			 * automatically terminate all active groups, so let's
+			 * force the state to halted here.
+			 */
+			if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
+				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
+			slot_mask &= ~BIT(csg_id);
+		}
+	}
+
+	/* Flush L2 and LSC caches to make sure suspend state is up-to-date.
+	 * If the flush fails, flag all queues for termination.
+	 */
+	if (suspended_slots) {
+		bool flush_caches_failed = false;
+		u32 slot_mask = suspended_slots;
+
+		if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
+			flush_caches_failed = true;
+
+		while (slot_mask) {
+			u32 csg_id = ffs(slot_mask) - 1;
+			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+
+			if (flush_caches_failed)
+				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
+			else
+				csg_slot_sync_update_locked(ptdev, csg_id);
+
+			slot_mask &= ~BIT(csg_id);
+		}
+
+		if (flush_caches_failed)
+			faulty_slots |= suspended_slots;
+	}
+
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+
+		group = csg_slot->group;
+		if (!group)
+			continue;
+
+		group_get(group);
+
+		if (group->csg_id >= 0)
+			sched_process_csg_irq_locked(ptdev, group->csg_id);
+
+		group_unbind_locked(group);
+
+		drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
+
+		if (group_can_run(group)) {
+			list_add(&group->run_node,
+				 &sched->groups.idle[group->priority]);
+		} else {
+			/* We don't bother stopping the scheduler if the group is
+			 * faulty, the group termination work will finish the job.
+			 */
+			list_del_init(&group->wait_node);
+			group_queue_work(group, term);
+		}
+		group_put(group);
+	}
+	mutex_unlock(&sched->lock);
+}
+
+void panthor_sched_pre_reset(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group, *group_tmp;
+	u32 i;
+
+	mutex_lock(&sched->reset.lock);
+	atomic_set(&sched->reset.in_progress, true);
+
+	/* Cancel all scheduler works. Once this is done, these works can't be
+	 * scheduled again until the reset operation is complete.
+	 */
+	cancel_work_sync(&sched->sync_upd_work);
+	cancel_delayed_work_sync(&sched->tick_work);
+
+	panthor_sched_suspend(ptdev);
+
+	/* Stop all groups that might still accept jobs, so we don't get passed
+	 * new jobs while we're resetting.
+	 */
+	for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
+		/* All groups should be in the idle lists. */
+		drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
+		list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
+			panthor_group_stop(group);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
+		list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
+			panthor_group_stop(group);
+	}
+
+	mutex_unlock(&sched->reset.lock);
+}
+
+void panthor_sched_post_reset(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group, *group_tmp;
+
+	mutex_lock(&sched->reset.lock);
+
+	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
+		panthor_group_start(group);
+
+	/* We're done resetting the GPU, clear the reset.in_progress bit so we can
+	 * kick the scheduler.
+	 */
+	atomic_set(&sched->reset.in_progress, false);
+	mutex_unlock(&sched->reset.lock);
+
+	sched_queue_delayed_work(sched, tick, 0);
+
+	sched_queue_work(sched, sync_upd);
+}
+
+static void group_sync_upd_work(struct work_struct *work)
+{
+	struct panthor_group *group =
+		container_of(work, struct panthor_group, sync_upd_work);
+	struct panthor_job *job, *job_tmp;
+	LIST_HEAD(done_jobs);
+	u32 queue_idx;
+	bool cookie;
+
+	cookie = dma_fence_begin_signalling();
+	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
+		struct panthor_queue *queue = group->queues[queue_idx];
+		struct panthor_syncobj_64b *syncobj;
+
+		if (!queue)
+			continue;
+
+		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
+
+		spin_lock(&queue->fence_ctx.lock);
+		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
+			if (!job->call_info.size)
+				continue;
+
+			if (syncobj->seqno < job->done_fence->seqno)
+				break;
+
+			list_move_tail(&job->node, &done_jobs);
+			dma_fence_signal_locked(job->done_fence);
+		}
+		spin_unlock(&queue->fence_ctx.lock);
+	}
+	dma_fence_end_signalling(cookie);
+
+	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
+		list_del_init(&job->node);
+		panthor_job_put(&job->base);
+	}
+
+	group_put(group);
+}
+
+static struct dma_fence *
+queue_run_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+	struct panthor_group *group = job->group;
+	struct panthor_queue *queue = group->queues[job->queue_idx];
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
+	u64 addr_reg = ptdev->csif_info.cs_reg_count -
+		       ptdev->csif_info.unpreserved_cs_reg_count;
+	u64 val_reg = addr_reg + 2;
+	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
+			job->queue_idx * sizeof(struct panthor_syncobj_64b);
+	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
+	struct dma_fence *done_fence;
+	int ret;
+
+	u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
+		/* MOV32 rX+2, cs.latest_flush */
+		(2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
+
+		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
+		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
+
+		/* MOV48 rX:rX+1, cs.start */
+		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
+
+		/* MOV32 rX+2, cs.size */
+		(2ull << 56) | (val_reg << 48) | job->call_info.size,
+
+		/* WAIT(0) => waits for FLUSH_CACHE2 instruction */
+		(3ull << 56) | (1 << 16),
+
+		/* CALL rX:rX+1, rX+2 */
+		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
+
+		/* MOV48 rX:rX+1, sync_addr */
+		(1ull << 56) | (addr_reg << 48) | sync_addr,
+
+		/* MOV48 rX+2, #1 */
+		(1ull << 56) | (val_reg << 48) | 1,
+
+		/* WAIT(all) */
+		(3ull << 56) | (waitall_mask << 16),
+
+		/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
+		(51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
+
+		/* ERROR_BARRIER, so we can recover from faults at job
+		 * boundaries.
+		 */
+		(47ull << 56),
+	};
+
+	/* Need to be cacheline aligned to please the prefetcher. */
+	static_assert(sizeof(call_instrs) % 64 == 0,
+		      "call_instrs is not aligned on a cacheline");
+
+	/* Stream size is zero, nothing to do => return a NULL fence and let
+	 * drm_sched signal the parent.
+	 */
+	if (!job->call_info.size)
+		return NULL;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		return ERR_PTR(ret);
+
+	mutex_lock(&sched->lock);
+	if (!group_can_run(group)) {
+		done_fence = ERR_PTR(-ECANCELED);
+		goto out_unlock;
+	}
+
+	dma_fence_init(job->done_fence,
+		       &panthor_queue_fence_ops,
+		       &queue->fence_ctx.lock,
+		       queue->fence_ctx.id,
+		       atomic64_inc_return(&queue->fence_ctx.seqno));
+
+	memcpy(queue->ringbuf->kmap + ringbuf_insert,
+	       call_instrs, sizeof(call_instrs));
+
+	panthor_job_get(&job->base);
+	spin_lock(&queue->fence_ctx.lock);
+	list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
+	spin_unlock(&queue->fence_ctx.lock);
+
+	job->ringbuf.start = queue->iface.input->insert;
+	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
+
+	/* Make sure the ring buffer is updated before the INSERT
+	 * register.
+	 */
+	wmb();
+
+	queue->iface.input->extract = queue->iface.output->extract;
+	queue->iface.input->insert = job->ringbuf.end;
+
+	if (group->csg_id < 0) {
+		/* If the queue is blocked, we want to keep the timeout running, so we
+		 * can detect unbounded waits and kill the group when that happens.
+		 * Otherwise, we suspend the timeout so the time we spend waiting for
+		 * a CSG slot is not counted.
+		 */
+		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
+		    !queue->timeout_suspended) {
+			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
+			queue->timeout_suspended = true;
+		}
+
+		group_schedule_locked(group, BIT(job->queue_idx));
+	} else {
+		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
+		if (!sched->pm.has_ref &&
+		    !(group->blocked_queues & BIT(job->queue_idx))) {
+			pm_runtime_get(ptdev->base.dev);
+			sched->pm.has_ref = true;
+		}
+	}
+
+	done_fence = dma_fence_get(job->done_fence);
+
+out_unlock:
+	mutex_unlock(&sched->lock);
+	pm_runtime_mark_last_busy(ptdev->base.dev);
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+
+	return done_fence;
+}
+
+static enum drm_gpu_sched_stat
+queue_timedout_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+	struct panthor_group *group = job->group;
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_queue *queue = group->queues[job->queue_idx];
+
+	drm_warn(&ptdev->base, "job timeout\n");
+
+	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
+
+	queue_stop(queue, job);
+
+	mutex_lock(&sched->lock);
+	group->timedout = true;
+	if (group->csg_id >= 0) {
+		sched_queue_delayed_work(ptdev->scheduler, tick, 0);
+	} else {
+		/* Remove from the run queues, so the scheduler can't
+		 * pick the group on the next tick.
+		 */
+		list_del_init(&group->run_node);
+		list_del_init(&group->wait_node);
+
+		group_queue_work(group, term);
+	}
+	mutex_unlock(&sched->lock);
+
+	queue_start(queue);
+
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void queue_free_job(struct drm_sched_job *sched_job)
+{
+	drm_sched_job_cleanup(sched_job);
+	panthor_job_put(sched_job);
+}
+
+static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
+	.run_job = queue_run_job,
+	.timedout_job = queue_timedout_job,
+	.free_job = queue_free_job,
+};
+
+static struct panthor_queue *
+group_create_queue(struct panthor_group *group,
+		   const struct drm_panthor_queue_create *args)
+{
+	struct drm_gpu_scheduler *drm_sched;
+	struct panthor_queue *queue;
+	int ret;
+
+	if (args->pad[0] || args->pad[1] || args->pad[2])
+		return ERR_PTR(-EINVAL);
+
+	if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
+	    !is_power_of_2(args->ringbuf_size))
+		return ERR_PTR(-EINVAL);
+
+	if (args->priority > CSF_MAX_QUEUE_PRIO)
+		return ERR_PTR(-EINVAL);
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return ERR_PTR(-ENOMEM);
+
+	queue->fence_ctx.id = dma_fence_context_alloc(1);
+	spin_lock_init(&queue->fence_ctx.lock);
+	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
+
+	queue->priority = args->priority;
+
+	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
+						  args->ringbuf_size,
+						  DRM_PANTHOR_BO_NO_MMAP,
+						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+						  PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(queue->ringbuf)) {
+		ret = PTR_ERR(queue->ringbuf);
+		goto err_free_queue;
+	}
+
+	ret = panthor_kernel_bo_vmap(queue->ringbuf);
+	if (ret)
+		goto err_free_queue;
+
+	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
+							    &queue->iface.input,
+							    &queue->iface.output,
+							    &queue->iface.input_fw_va,
+							    &queue->iface.output_fw_va);
+	if (IS_ERR(queue->iface.mem)) {
+		ret = PTR_ERR(queue->iface.mem);
+		goto err_free_queue;
+	}
+
+	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
+			     group->ptdev->scheduler->wq, 1,
+			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
+			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
+			     group->ptdev->reset.wq,
+			     NULL, "panthor-queue", group->ptdev->base.dev);
+	if (ret)
+		goto err_free_queue;
+
+	drm_sched = &queue->scheduler;
+	ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
+
+	return queue;
+
+err_free_queue:
+	group_free_queue(group, queue);
+	return ERR_PTR(ret);
+}
+
+#define MAX_GROUPS_PER_POOL		128
+
+int panthor_group_create(struct panthor_file *pfile,
+			 const struct drm_panthor_group_create *group_args,
+			 const struct drm_panthor_queue_create *queue_args)
+{
+	struct panthor_device *ptdev = pfile->ptdev;
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
+	struct panthor_group *group = NULL;
+	u32 gid, i, suspend_size;
+	int ret;
+
+	if (group_args->pad)
+		return -EINVAL;
+
+	if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH)
+		return -EINVAL;
+
+	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
+	    (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
+	    (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
+		return -EINVAL;
+
+	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
+	    hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
+	    hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
+		return -EINVAL;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return -ENOMEM;
+
+	spin_lock_init(&group->fatal_lock);
+	kref_init(&group->refcount);
+	group->state = PANTHOR_CS_GROUP_CREATED;
+	group->csg_id = -1;
+
+	group->ptdev = ptdev;
+	group->max_compute_cores = group_args->max_compute_cores;
+	group->compute_core_mask = group_args->compute_core_mask;
+	group->max_fragment_cores = group_args->max_fragment_cores;
+	group->fragment_core_mask = group_args->fragment_core_mask;
+	group->max_tiler_cores = group_args->max_tiler_cores;
+	group->tiler_core_mask = group_args->tiler_core_mask;
+	group->priority = group_args->priority;
+
+	INIT_LIST_HEAD(&group->wait_node);
+	INIT_LIST_HEAD(&group->run_node);
+	INIT_WORK(&group->term_work, group_term_work);
+	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
+	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
+	INIT_WORK(&group->release_work, group_release_work);
+
+	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
+	if (!group->vm) {
+		ret = -EINVAL;
+		goto err_put_group;
+	}
+
+	suspend_size = csg_iface->control->suspend_size;
+	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
+	if (IS_ERR(group->suspend_buf)) {
+		ret = PTR_ERR(group->suspend_buf);
+		group->suspend_buf = NULL;
+		goto err_put_group;
+	}
+
+	suspend_size = csg_iface->control->protm_suspend_size;
+	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
+	if (IS_ERR(group->protm_suspend_buf)) {
+		ret = PTR_ERR(group->protm_suspend_buf);
+		group->protm_suspend_buf = NULL;
+		goto err_put_group;
+	}
+
+	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
+						   group_args->queues.count *
+						   sizeof(struct panthor_syncobj_64b),
+						   DRM_PANTHOR_BO_NO_MMAP,
+						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+						   PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(group->syncobjs)) {
+		ret = PTR_ERR(group->syncobjs);
+		goto err_put_group;
+	}
+
+	ret = panthor_kernel_bo_vmap(group->syncobjs);
+	if (ret)
+		goto err_put_group;
+
+	memset(group->syncobjs->kmap, 0,
+	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
+
+	for (i = 0; i < group_args->queues.count; i++) {
+		group->queues[i] = group_create_queue(group, &queue_args[i]);
+		if (IS_ERR(group->queues[i])) {
+			ret = PTR_ERR(group->queues[i]);
+			group->queues[i] = NULL;
+			goto err_put_group;
+		}
+
+		group->queue_count++;
+	}
+
+	group->idle_queues = GENMASK(group->queue_count - 1, 0);
+
+	ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
+	if (ret)
+		goto err_put_group;
+
+	mutex_lock(&sched->reset.lock);
+	if (atomic_read(&sched->reset.in_progress)) {
+		panthor_group_stop(group);
+	} else {
+		mutex_lock(&sched->lock);
+		list_add_tail(&group->run_node,
+			      &sched->groups.idle[group->priority]);
+		mutex_unlock(&sched->lock);
+	}
+	mutex_unlock(&sched->reset.lock);
+
+	return gid;
+
+err_put_group:
+	group_put(group);
+	return ret;
+}
+
+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_device *ptdev = pfile->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group;
+
+	group = xa_erase(&gpool->xa, group_handle);
+	if (!group)
+		return -EINVAL;
+
+	for (u32 i = 0; i < group->queue_count; i++) {
+		if (group->queues[i])
+			drm_sched_entity_destroy(&group->queues[i]->entity);
+	}
+
+	mutex_lock(&sched->reset.lock);
+	mutex_lock(&sched->lock);
+	group->destroyed = true;
+	if (group->csg_id >= 0) {
+		sched_queue_delayed_work(sched, tick, 0);
+	} else if (!atomic_read(&sched->reset.in_progress)) {
+		/* Remove from the run queues, so the scheduler can't
+		 * pick the group on the next tick.
+		 */
+		list_del_init(&group->run_node);
+		list_del_init(&group->wait_node);
+		group_queue_work(group, term);
+	}
+	mutex_unlock(&sched->lock);
+	mutex_unlock(&sched->reset.lock);
+
+	group_put(group);
+	return 0;
+}
+
+int panthor_group_get_state(struct panthor_file *pfile,
+			    struct drm_panthor_group_get_state *get_state)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_device *ptdev = pfile->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group;
+
+	if (get_state->pad)
+		return -EINVAL;
+
+	group = group_get(xa_load(&gpool->xa, get_state->group_handle));
+	if (!group)
+		return -EINVAL;
+
+	memset(get_state, 0, sizeof(*get_state));
+
+	mutex_lock(&sched->lock);
+	if (group->timedout)
+		get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
+	if (group->fatal_queues) {
+		get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
+		get_state->fatal_queues = group->fatal_queues;
+	}
+	mutex_unlock(&sched->lock);
+
+	group_put(group);
+	return 0;
+}
+
+int panthor_group_pool_create(struct panthor_file *pfile)
+{
+	struct panthor_group_pool *gpool;
+
+	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
+	if (!gpool)
+		return -ENOMEM;
+
+	xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
+	pfile->groups = gpool;
+	return 0;
+}
+
+void panthor_group_pool_destroy(struct panthor_file *pfile)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_group *group;
+	unsigned long i;
+
+	if (IS_ERR_OR_NULL(gpool))
+		return;
+
+	xa_for_each(&gpool->xa, i, group)
+		panthor_group_destroy(pfile, i);
+
+	xa_destroy(&gpool->xa);
+	kfree(gpool);
+	pfile->groups = NULL;
+}
+
+static void job_release(struct kref *ref)
+{
+	struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
+
+	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
+
+	if (job->base.s_fence)
+		drm_sched_job_cleanup(&job->base);
+
+	if (job->done_fence && job->done_fence->ops)
+		dma_fence_put(job->done_fence);
+	else
+		dma_fence_free(job->done_fence);
+
+	group_put(job->group);
+
+	kfree(job);
+}
+
+struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
+{
+	if (sched_job) {
+		struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+		kref_get(&job->refcount);
+	}
+
+	return sched_job;
+}
+
+void panthor_job_put(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+	if (sched_job)
+		kref_put(&job->refcount, job_release);
+}
+
+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+	return job->group->vm;
+}
+
+struct drm_sched_job *
+panthor_job_create(struct panthor_file *pfile,
+		   u16 group_handle,
+		   const struct drm_panthor_queue_submit *qsubmit)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_job *job;
+	int ret;
+
+	if (qsubmit->pad)
+		return ERR_PTR(-EINVAL);
+
+	/* If stream_addr is zero, so stream_size should be. */
+	if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
+		return ERR_PTR(-EINVAL);
+
+	/* Make sure the address is aligned on 64-byte (cacheline) and the size is
+	 * aligned on 8-byte (instruction size).
+	 */
+	if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
+		return ERR_PTR(-EINVAL);
+
+	/* bits 24:30 must be zero. */
+	if (qsubmit->latest_flush & GENMASK(30, 24))
+		return ERR_PTR(-EINVAL);
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&job->refcount);
+	job->queue_idx = qsubmit->queue_index;
+	job->call_info.size = qsubmit->stream_size;
+	job->call_info.start = qsubmit->stream_addr;
+	job->call_info.latest_flush = qsubmit->latest_flush;
+	INIT_LIST_HEAD(&job->node);
+
+	job->group = group_get(xa_load(&gpool->xa, group_handle));
+	if (!job->group) {
+		ret = -EINVAL;
+		goto err_put_job;
+	}
+
+	if (job->queue_idx >= job->group->queue_count ||
+	    !job->group->queues[job->queue_idx]) {
+		ret = -EINVAL;
+		goto err_put_job;
+	}
+
+	job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
+	if (!job->done_fence) {
+		ret = -ENOMEM;
+		goto err_put_job;
+	}
+
+	ret = drm_sched_job_init(&job->base,
+				 &job->group->queues[job->queue_idx]->entity,
+				 1, job->group);
+	if (ret)
+		goto err_put_job;
+
+	return &job->base;
+
+err_put_job:
+	panthor_job_put(&job->base);
+	return ERR_PTR(ret);
+}
+
+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+	/* Still not sure why we want USAGE_WRITE for external objects, since I
+	 * was assuming this would be handled through explicit syncs being imported
+	 * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
+	 * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
+	 */
+	panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
+				DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
+}
+
+void panthor_sched_unplug(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+
+	cancel_delayed_work_sync(&sched->tick_work);
+
+	mutex_lock(&sched->lock);
+	if (sched->pm.has_ref) {
+		pm_runtime_put(ptdev->base.dev);
+		sched->pm.has_ref = false;
+	}
+	mutex_unlock(&sched->lock);
+}
+
+static void panthor_sched_fini(struct drm_device *ddev, void *res)
+{
+	struct panthor_scheduler *sched = res;
+	int prio;
+
+	if (!sched || !sched->csg_slot_count)
+		return;
+
+	cancel_delayed_work_sync(&sched->tick_work);
+
+	if (sched->wq)
+		destroy_workqueue(sched->wq);
+
+	if (sched->heap_alloc_wq)
+		destroy_workqueue(sched->heap_alloc_wq);
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
+		drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
+	}
+
+	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
+}
+
+int panthor_sched_init(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
+	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
+	struct panthor_scheduler *sched;
+	u32 gpu_as_count, num_groups;
+	int prio, ret;
+
+	sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
+	if (!sched)
+		return -ENOMEM;
+
+	/* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
+	 * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
+	 */
+	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
+
+	/* The FW-side scheduler might deadlock if two groups with the same
+	 * priority try to access a set of resources that overlaps, with part
+	 * of the resources being allocated to one group and the other part to
+	 * the other group, both groups waiting for the remaining resources to
+	 * be allocated. To avoid that, it is recommended to assign each CSG a
+	 * different priority. In theory we could allow several groups to have
+	 * the same CSG priority if they don't request the same resources, but
+	 * that makes the scheduling logic more complicated, so let's clamp
+	 * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
+	 */
+	num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
+
+	/* We need at least one AS for the MCU and one for the GPU contexts. */
+	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
+	if (!gpu_as_count) {
+		drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
+			gpu_as_count + 1);
+		return -EINVAL;
+	}
+
+	sched->ptdev = ptdev;
+	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
+	sched->csg_slot_count = num_groups;
+	sched->cs_slot_count = csg_iface->control->stream_num;
+	sched->as_slot_count = gpu_as_count;
+	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
+	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
+	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
+
+	sched->last_tick = 0;
+	sched->resched_target = U64_MAX;
+	sched->tick_period = msecs_to_jiffies(10);
+	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
+	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
+	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
+
+	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
+	if (ret)
+		return ret;
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		INIT_LIST_HEAD(&sched->groups.runnable[prio]);
+		INIT_LIST_HEAD(&sched->groups.idle[prio]);
+	}
+	INIT_LIST_HEAD(&sched->groups.waiting);
+
+	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&sched->reset.stopped_groups);
+
+	/* sched->heap_alloc_wq will be used for heap chunk allocation on
+	 * tiler OOM events, which means we can't use the same workqueue for
+	 * the scheduler because works queued by the scheduler are in
+	 * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
+	 * work around this limitation.
+	 *
+	 * FIXME: Ultimately, what we need is a failable/non-blocking GEM
+	 * allocation path that we can call when a heap OOM is reported. The
+	 * FW is smart enough to fall back on other methods if the kernel can't
+	 * allocate memory, and fail the tiling job if none of these
+	 * countermeasures worked.
+	 *
+	 * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
+	 * system is running out of memory.
+	 */
+	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
+	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+	if (!sched->wq || !sched->heap_alloc_wq) {
+		panthor_sched_fini(&ptdev->base, sched);
+		drm_err(&ptdev->base, "Failed to allocate the workqueues");
+		return -ENOMEM;
+	}
+
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
+	if (ret)
+		return ret;
+
+	ptdev->scheduler = sched;
+	return 0;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
new file mode 100644
index 000000000000..66438b1f331f
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_SCHED_H__
+#define __PANTHOR_SCHED_H__
+
+struct drm_exec;
+struct dma_fence;
+struct drm_file;
+struct drm_gem_object;
+struct drm_sched_job;
+struct drm_panthor_group_create;
+struct drm_panthor_queue_create;
+struct drm_panthor_group_get_state;
+struct drm_panthor_queue_submit;
+struct panthor_device;
+struct panthor_file;
+struct panthor_group_pool;
+struct panthor_job;
+
+int panthor_group_create(struct panthor_file *pfile,
+			 const struct drm_panthor_group_create *group_args,
+			 const struct drm_panthor_queue_create *queue_args);
+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
+int panthor_group_get_state(struct panthor_file *pfile,
+			    struct drm_panthor_group_get_state *get_state);
+
+struct drm_sched_job *
+panthor_job_create(struct panthor_file *pfile,
+		   u16 group_handle,
+		   const struct drm_panthor_queue_submit *qsubmit);
+struct drm_sched_job *panthor_job_get(struct drm_sched_job *job);
+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job);
+void panthor_job_put(struct drm_sched_job *job);
+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
+
+int panthor_group_pool_create(struct panthor_file *pfile);
+void panthor_group_pool_destroy(struct panthor_file *pfile);
+
+int panthor_sched_init(struct panthor_device *ptdev);
+void panthor_sched_unplug(struct panthor_device *ptdev);
+void panthor_sched_pre_reset(struct panthor_device *ptdev);
+void panthor_sched_post_reset(struct panthor_device *ptdev);
+void panthor_sched_suspend(struct panthor_device *ptdev);
+void panthor_sched_resume(struct panthor_device *ptdev);
+
+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);
+
+#endif
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index 1e46b0a6e478..5893e27a7ae5 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -29,9 +29,6 @@
 #include "qxl_drv.h"
 #include "qxl_object.h"
 
-static int __qxl_bo_pin(struct qxl_bo *bo);
-static void __qxl_bo_unpin(struct qxl_bo *bo);
-
 static void qxl_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct qxl_bo *bo;
@@ -167,13 +164,9 @@ int qxl_bo_vmap_locked(struct qxl_bo *bo, struct iosys_map *map)
 		goto out;
 	}
 
-	r = __qxl_bo_pin(bo);
-	if (r)
-		return r;
-
 	r = ttm_bo_vmap(&bo->tbo, &bo->map);
 	if (r) {
-		__qxl_bo_unpin(bo);
+		qxl_bo_unpin_locked(bo);
 		return r;
 	}
 	bo->map_count = 1;
@@ -246,7 +239,6 @@ void qxl_bo_vunmap_locked(struct qxl_bo *bo)
 		return;
 	bo->kptr = NULL;
 	ttm_bo_vunmap(&bo->tbo, &bo->map);
-	__qxl_bo_unpin(bo);
 }
 
 int qxl_bo_vunmap(struct qxl_bo *bo)
@@ -290,12 +282,14 @@ struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo)
 	return bo;
 }
 
-static int __qxl_bo_pin(struct qxl_bo *bo)
+int qxl_bo_pin_locked(struct qxl_bo *bo)
 {
 	struct ttm_operation_ctx ctx = { false, false };
 	struct drm_device *ddev = bo->tbo.base.dev;
 	int r;
 
+	dma_resv_assert_held(bo->tbo.base.resv);
+
 	if (bo->tbo.pin_count) {
 		ttm_bo_pin(&bo->tbo);
 		return 0;
@@ -309,14 +303,16 @@ static int __qxl_bo_pin(struct qxl_bo *bo)
 	return r;
 }
 
-static void __qxl_bo_unpin(struct qxl_bo *bo)
+void qxl_bo_unpin_locked(struct qxl_bo *bo)
 {
+	dma_resv_assert_held(bo->tbo.base.resv);
+
 	ttm_bo_unpin(&bo->tbo);
 }
 
 /*
  * Reserve the BO before pinning the object.  If the BO was reserved
- * beforehand, use the internal version directly __qxl_bo_pin.
+ * beforehand, use the internal version directly qxl_bo_pin_locked.
  *
  */
 int qxl_bo_pin(struct qxl_bo *bo)
@@ -327,14 +323,14 @@ int qxl_bo_pin(struct qxl_bo *bo)
 	if (r)
 		return r;
 
-	r = __qxl_bo_pin(bo);
+	r = qxl_bo_pin_locked(bo);
 	qxl_bo_unreserve(bo);
 	return r;
 }
 
 /*
  * Reserve the BO before pinning the object.  If the BO was reserved
- * beforehand, use the internal version directly __qxl_bo_unpin.
+ * beforehand, use the internal version directly qxl_bo_unpin_locked.
  *
  */
 int qxl_bo_unpin(struct qxl_bo *bo)
@@ -345,7 +341,7 @@ int qxl_bo_unpin(struct qxl_bo *bo)
 	if (r)
 		return r;
 
-	__qxl_bo_unpin(bo);
+	qxl_bo_unpin_locked(bo);
 	qxl_bo_unreserve(bo);
 	return 0;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index 53392cb90eec..1cf5bc759101 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -67,6 +67,8 @@ void *qxl_bo_kmap_atomic_page(struct qxl_device *qdev, struct qxl_bo *bo, int pa
 void qxl_bo_kunmap_atomic_page(struct qxl_device *qdev, struct qxl_bo *bo, void *map);
 extern struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo);
 extern void qxl_bo_unref(struct qxl_bo **bo);
+extern int qxl_bo_pin_locked(struct qxl_bo *bo);
+extern void qxl_bo_unpin_locked(struct qxl_bo *bo);
 extern int qxl_bo_pin(struct qxl_bo *bo);
 extern int qxl_bo_unpin(struct qxl_bo *bo);
 extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain);
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
index 9169c26357d3..19bf551a7b31 100644
--- a/drivers/gpu/drm/qxl/qxl_prime.c
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -32,14 +32,14 @@ int qxl_gem_prime_pin(struct drm_gem_object *obj)
 {
 	struct qxl_bo *bo = gem_to_qxl_bo(obj);
 
-	return qxl_bo_pin(bo);
+	return qxl_bo_pin_locked(bo);
 }
 
 void qxl_gem_prime_unpin(struct drm_gem_object *obj)
 {
 	struct qxl_bo *bo = gem_to_qxl_bo(obj);
 
-	qxl_bo_unpin(bo);
+	qxl_bo_unpin_locked(bo);
 }
 
 struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
index f98356be0af2..18c867219a70 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -2,11 +2,13 @@
 
 config DRM_RADEON
 	tristate "ATI Radeon"
-	depends on DRM && PCI && MMU
 	depends on AGP || !AGP
+	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on PCI
+	depends on MMU
 	select FW_LOADER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
         select DRM_KMS_HELPER
 	select DRM_SUBALLOC_HELPER
         select DRM_TTM
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index b3cfc99f4d7e..a77881f035e7 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -73,32 +73,21 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
 	struct radeon_bo *bo = gem_to_radeon_bo(obj);
 	int ret = 0;
 
-	ret = radeon_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		return ret;
-
 	/* pin buffer into GTT */
 	ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
 	if (likely(ret == 0))
 		bo->prime_shared_count++;
 
-	radeon_bo_unreserve(bo);
 	return ret;
 }
 
 void radeon_gem_prime_unpin(struct drm_gem_object *obj)
 {
 	struct radeon_bo *bo = gem_to_radeon_bo(obj);
-	int ret = 0;
-
-	ret = radeon_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		return;
 
 	radeon_bo_unpin(bo);
 	if (bo->prime_shared_count)
 		bo->prime_shared_count--;
-	radeon_bo_unreserve(bo);
 }
 
 
diff --git a/drivers/gpu/drm/renesas/rcar-du/Kconfig b/drivers/gpu/drm/renesas/rcar-du/Kconfig
index 53c356aed5d5..2dc739db2ba3 100644
--- a/drivers/gpu/drm/renesas/rcar-du/Kconfig
+++ b/drivers/gpu/drm/renesas/rcar-du/Kconfig
@@ -25,8 +25,8 @@ config DRM_RCAR_CMM
 config DRM_RCAR_DW_HDMI
 	tristate "R-Car Gen3 and RZ/G2 DU HDMI Encoder Support"
 	depends on DRM && OF
+	depends on DRM_DW_HDMI
 	depends on DRM_RCAR_DU || COMPILE_TEST
-	select DRM_DW_HDMI
 	help
 	  Enable support for R-Car Gen3 or RZ/G2 internal HDMI encoder.
 
diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.c
index 0ae6331d6430..8643ff2eec46 100644
--- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.c
+++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_vsp.c
@@ -66,9 +66,6 @@ void rzg2l_du_vsp_disable(struct rzg2l_du_crtc *crtc)
 void rzg2l_du_vsp_atomic_flush(struct rzg2l_du_crtc *crtc)
 {
 	struct vsp1_du_atomic_pipe_config cfg = { { 0, } };
-	struct rzg2l_du_crtc_state *state;
-
-	state = to_rzg2l_crtc_state(crtc->crtc.state);
 
 	vsp1_du_atomic_flush(crtc->vsp->vsp, crtc->vsp_pipe, &cfg);
 }
diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index 1bf3e2829cd0..4b49a14758fe 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -7,7 +7,6 @@ config DRM_ROCKCHIP
 	select DRM_PANEL
 	select VIDEOMODE_HELPERS
 	select DRM_ANALOGIX_DP if ROCKCHIP_ANALOGIX_DP
-	select DRM_DW_HDMI if ROCKCHIP_DW_HDMI
 	select DRM_DW_MIPI_DSI if ROCKCHIP_DW_MIPI_DSI
 	select GENERIC_PHY if ROCKCHIP_DW_MIPI_DSI
 	select GENERIC_PHY_MIPI_DPHY if ROCKCHIP_DW_MIPI_DSI
@@ -36,9 +35,9 @@ config ROCKCHIP_VOP2
 
 config ROCKCHIP_ANALOGIX_DP
 	bool "Rockchip specific extensions for Analogix DP driver"
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on ROCKCHIP_VOP
-	select DRM_DISPLAY_HELPER
-	select DRM_DISPLAY_DP_HELPER
 	help
 	  This selects support for Rockchip SoC specific extensions
 	  for the Analogix Core DP driver. If you want to enable DP
@@ -46,9 +45,9 @@ config ROCKCHIP_ANALOGIX_DP
 
 config ROCKCHIP_CDN_DP
 	bool "Rockchip cdn DP"
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m)
-	select DRM_DISPLAY_HELPER
-	select DRM_DISPLAY_DP_HELPER
 	help
 	  This selects support for Rockchip SoC specific extensions
 	  for the cdn DP driver. If you want to enable Dp on
@@ -57,6 +56,7 @@ config ROCKCHIP_CDN_DP
 
 config ROCKCHIP_DW_HDMI
 	bool "Rockchip specific extensions for Synopsys DW HDMI"
+	depends on DRM_DW_HDMI
 	help
 	  This selects support for Rockchip SoC specific extensions
 	  for the Synopsys DesignWare HDMI driver. If you want to
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 4741d9f6544c..5b19c7cb7b7e 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -57,8 +57,8 @@ config DRM_SUN6I_DSI
 config DRM_SUN8I_DW_HDMI
 	tristate "Support for Allwinner version of DesignWare HDMI"
 	depends on DRM_SUN4I
+	depends on DRM_DW_HDMI
 	default DRM_SUN4I
-	select DRM_DW_HDMI
 	help
 	  Choose this option if you have an Allwinner SoC with the
 	  DesignWare HDMI controller. SoCs that support HDMI and
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 84e7e6bc3a0c..6974caa99ece 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -4,11 +4,11 @@ config DRM_TEGRA
 	depends on ARCH_TEGRA || COMPILE_TEST
 	depends on COMMON_CLK
 	depends on DRM
+	depends on DRM_DISPLAY_DP_AUX_BUS
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
 	depends on OF
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_DP_AUX_BUS
 	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL
diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c
index a0e494c806a9..f371518f8697 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.c
+++ b/drivers/gpu/drm/tidss/tidss_kms.c
@@ -135,8 +135,7 @@ static int tidss_dispc_modeset_init(struct tidss_device *tidss)
 			dev_dbg(dev, "no panel/bridge for port %d\n", i);
 			continue;
 		} else if (ret) {
-			dev_dbg(dev, "port %d probe returned %d\n", i, ret);
-			return ret;
+			return dev_err_probe(dev, ret, "port %d probe failed\n", i);
 		}
 
 		if (panel) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 96a724e8f3ff..e059b1e1b13b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -724,64 +724,36 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 	return ret;
 }
 
-/*
- * Repeatedly evict memory from the LRU for @mem_type until we create enough
- * space, or we've evicted everything and there isn't enough space.
- */
-static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
-				  const struct ttm_place *place,
-				  struct ttm_resource **mem,
-				  struct ttm_operation_ctx *ctx)
-{
-	struct ttm_device *bdev = bo->bdev;
-	struct ttm_resource_manager *man;
-	struct ww_acquire_ctx *ticket;
-	int ret;
-
-	man = ttm_manager_type(bdev, place->mem_type);
-	ticket = dma_resv_locking_ctx(bo->base.resv);
-	do {
-		ret = ttm_resource_alloc(bo, place, mem);
-		if (likely(!ret))
-			break;
-		if (unlikely(ret != -ENOSPC))
-			return ret;
-		ret = ttm_mem_evict_first(bdev, man, place, ctx,
-					  ticket);
-		if (unlikely(ret != 0))
-			return ret;
-	} while (1);
-
-	return ttm_bo_add_move_fence(bo, man, *mem, ctx->no_wait_gpu);
-}
-
 /**
- * ttm_bo_mem_space
+ * ttm_bo_alloc_resource - Allocate backing store for a BO
  *
- * @bo: Pointer to a struct ttm_buffer_object. the data of which
- * we want to allocate space for.
- * @placement: Proposed new placement for the buffer object.
- * @mem: A struct ttm_resource.
+ * @bo: Pointer to a struct ttm_buffer_object of which we want a resource for
+ * @placement: Proposed new placement for the buffer object
  * @ctx: if and how to sleep, lock buffers and alloc memory
+ * @force_space: If we should evict buffers to force space
+ * @res: The resulting struct ttm_resource.
  *
- * Allocate memory space for the buffer object pointed to by @bo, using
- * the placement flags in @placement, potentially evicting other idle buffer objects.
- * This function may sleep while waiting for space to become available.
+ * Allocates a resource for the buffer object pointed to by @bo, using the
+ * placement flags in @placement, potentially evicting other buffer objects when
+ * @force_space is true.
+ * This function may sleep while waiting for resources to become available.
  * Returns:
- * -EBUSY: No space available (only if no_wait == 1).
+ * -EBUSY: No space available (only if no_wait == true).
  * -ENOSPC: Could not allocate space for the buffer object, either due to
  * fragmentation or concurrent allocators.
  * -ERESTARTSYS: An interruptible sleep was interrupted by a signal.
  */
-int ttm_bo_mem_space(struct ttm_buffer_object *bo,
-			struct ttm_placement *placement,
-			struct ttm_resource **mem,
-			struct ttm_operation_ctx *ctx)
+static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
+				 struct ttm_placement *placement,
+				 struct ttm_operation_ctx *ctx,
+				 bool force_space,
+				 struct ttm_resource **res)
 {
 	struct ttm_device *bdev = bo->bdev;
-	bool type_found = false;
+	struct ww_acquire_ctx *ticket;
 	int i, ret;
 
+	ticket = dma_resv_locking_ctx(bo->base.resv);
 	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (unlikely(ret))
 		return ret;
@@ -790,98 +762,73 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		const struct ttm_place *place = &placement->placement[i];
 		struct ttm_resource_manager *man;
 
-		if (place->flags & TTM_PL_FLAG_FALLBACK)
-			continue;
-
 		man = ttm_manager_type(bdev, place->mem_type);
 		if (!man || !ttm_resource_manager_used(man))
 			continue;
 
-		type_found = true;
-		ret = ttm_resource_alloc(bo, place, mem);
-		if (ret == -ENOSPC)
+		if (place->flags & (force_space ? TTM_PL_FLAG_DESIRED :
+				    TTM_PL_FLAG_FALLBACK))
+			continue;
+
+		do {
+			ret = ttm_resource_alloc(bo, place, res);
+			if (unlikely(ret && ret != -ENOSPC))
+				return ret;
+			if (likely(!ret) || !force_space)
+				break;
+
+			ret = ttm_mem_evict_first(bdev, man, place, ctx,
+						  ticket);
+			if (unlikely(ret == -EBUSY))
+				break;
+			if (unlikely(ret))
+				return ret;
+		} while (1);
+		if (ret)
 			continue;
-		if (unlikely(ret))
-			goto error;
 
-		ret = ttm_bo_add_move_fence(bo, man, *mem, ctx->no_wait_gpu);
+		ret = ttm_bo_add_move_fence(bo, man, *res, ctx->no_wait_gpu);
 		if (unlikely(ret)) {
-			ttm_resource_free(bo, mem);
+			ttm_resource_free(bo, res);
 			if (ret == -EBUSY)
 				continue;
 
-			goto error;
+			return ret;
 		}
 		return 0;
 	}
 
-	for (i = 0; i < placement->num_placement; ++i) {
-		const struct ttm_place *place = &placement->placement[i];
-		struct ttm_resource_manager *man;
-
-		if (place->flags & TTM_PL_FLAG_DESIRED)
-			continue;
-
-		man = ttm_manager_type(bdev, place->mem_type);
-		if (!man || !ttm_resource_manager_used(man))
-			continue;
-
-		type_found = true;
-		ret = ttm_bo_mem_force_space(bo, place, mem, ctx);
-		if (likely(!ret))
-			return 0;
-
-		if (ret && ret != -EBUSY)
-			goto error;
-	}
-
-	ret = -ENOSPC;
-	if (!type_found) {
-		pr_err(TTM_PFX "No compatible memory type found\n");
-		ret = -EINVAL;
-	}
-
-error:
-	return ret;
+	return -ENOSPC;
 }
-EXPORT_SYMBOL(ttm_bo_mem_space);
 
-static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
-			      struct ttm_placement *placement,
-			      struct ttm_operation_ctx *ctx)
+/*
+ * ttm_bo_mem_space - Wrapper around ttm_bo_alloc_resource
+ *
+ * @bo: Pointer to a struct ttm_buffer_object of which we want a resource for
+ * @placement: Proposed new placement for the buffer object
+ * @res: The resulting struct ttm_resource.
+ * @ctx: if and how to sleep, lock buffers and alloc memory
+ *
+ * Tries both idle allocation and forcefully eviction of buffers. See
+ * ttm_bo_alloc_resource for details.
+ */
+int ttm_bo_mem_space(struct ttm_buffer_object *bo,
+		     struct ttm_placement *placement,
+		     struct ttm_resource **res,
+		     struct ttm_operation_ctx *ctx)
 {
-	struct ttm_resource *mem;
-	struct ttm_place hop;
+	bool force_space = false;
 	int ret;
 
-	dma_resv_assert_held(bo->base.resv);
+	do {
+		ret = ttm_bo_alloc_resource(bo, placement, ctx,
+					    force_space, res);
+		force_space = !force_space;
+	} while (ret == -ENOSPC && force_space);
 
-	/*
-	 * Determine where to move the buffer.
-	 *
-	 * If driver determines move is going to need
-	 * an extra step then it will return -EMULTIHOP
-	 * and the buffer will be moved to the temporary
-	 * stop and the driver will be called to make
-	 * the second hop.
-	 */
-	ret = ttm_bo_mem_space(bo, placement, &mem, ctx);
-	if (ret)
-		return ret;
-bounce:
-	ret = ttm_bo_handle_move_mem(bo, mem, false, ctx, &hop);
-	if (ret == -EMULTIHOP) {
-		ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx, &hop);
-		if (ret)
-			goto out;
-		/* try and move to final place now. */
-		goto bounce;
-	}
-out:
-	if (ret)
-		ttm_resource_free(bo, &mem);
 	return ret;
 }
+EXPORT_SYMBOL(ttm_bo_mem_space);
 
 /**
  * ttm_bo_validate
@@ -902,6 +849,9 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 		    struct ttm_placement *placement,
 		    struct ttm_operation_ctx *ctx)
 {
+	struct ttm_resource *res;
+	struct ttm_place hop;
+	bool force_space;
 	int ret;
 
 	dma_resv_assert_held(bo->base.resv);
@@ -912,20 +862,53 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	if (!placement->num_placement)
 		return ttm_bo_pipeline_gutting(bo);
 
-	/* Check whether we need to move buffer. */
-	if (bo->resource && ttm_resource_compatible(bo->resource, placement))
-		return 0;
+	force_space = false;
+	do {
+		/* Check whether we need to move buffer. */
+		if (bo->resource &&
+		    ttm_resource_compatible(bo->resource, placement,
+					    force_space))
+			return 0;
 
-	/* Moving of pinned BOs is forbidden */
-	if (bo->pin_count)
-		return -EINVAL;
+		/* Moving of pinned BOs is forbidden */
+		if (bo->pin_count)
+			return -EINVAL;
+
+		/*
+		 * Determine where to move the buffer.
+		 *
+		 * If driver determines move is going to need
+		 * an extra step then it will return -EMULTIHOP
+		 * and the buffer will be moved to the temporary
+		 * stop and the driver will be called to make
+		 * the second hop.
+		 */
+		ret = ttm_bo_alloc_resource(bo, placement, ctx, force_space,
+					    &res);
+		force_space = !force_space;
+		if (ret == -ENOSPC)
+			continue;
+		if (ret)
+			return ret;
+
+bounce:
+		ret = ttm_bo_handle_move_mem(bo, res, false, ctx, &hop);
+		if (ret == -EMULTIHOP) {
+			ret = ttm_bo_bounce_temp_buffer(bo, &res, ctx, &hop);
+			/* try and move to final place now. */
+			if (!ret)
+				goto bounce;
+		}
+		if (ret) {
+			ttm_resource_free(bo, &res);
+			return ret;
+		}
+
+	} while (ret && force_space);
 
-	ret = ttm_bo_move_buffer(bo, placement, ctx);
 	/* For backward compatibility with userspace */
 	if (ret == -ENOSPC)
 		return -ENOMEM;
-	if (ret)
-		return ret;
 
 	/*
 	 * We might need to add a TTM.
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index fb14f7716cf8..be8d286513f9 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -105,6 +105,7 @@ static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
 		pos->first = res;
 		pos->last = res;
 	} else {
+		WARN_ON(pos->first->bo->base.resv != res->bo->base.resv);
 		ttm_lru_bulk_move_pos_tail(pos, res);
 	}
 }
@@ -295,11 +296,13 @@ bool ttm_resource_intersects(struct ttm_device *bdev,
  *
  * @res: the resource to check
  * @placement: the placement to check against
+ * @evicting: true if the caller is doing evictions
  *
  * Returns true if the placement is compatible.
  */
 bool ttm_resource_compatible(struct ttm_resource *res,
-			     struct ttm_placement *placement)
+			     struct ttm_placement *placement,
+			     bool evicting)
 {
 	struct ttm_buffer_object *bo = res->bo;
 	struct ttm_device *bdev = bo->bdev;
@@ -315,14 +318,20 @@ bool ttm_resource_compatible(struct ttm_resource *res,
 		if (res->mem_type != place->mem_type)
 			continue;
 
+		if (place->flags & (evicting ? TTM_PL_FLAG_DESIRED :
+				    TTM_PL_FLAG_FALLBACK))
+			continue;
+
+		if (place->flags & TTM_PL_FLAG_CONTIGUOUS &&
+		    !(res->placement & TTM_PL_FLAG_CONTIGUOUS))
+			continue;
+
 		man = ttm_manager_type(bdev, res->mem_type);
 		if (man->func->compatible &&
 		    !man->func->compatible(man, res, place, bo->base.size))
 			continue;
 
-		if ((!(place->flags & TTM_PL_FLAG_CONTIGUOUS) ||
-		     (res->placement & TTM_PL_FLAG_CONTIGUOUS)))
-			return true;
+		return true;
 	}
 	return false;
 }
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index 91dcf8d174d6..4801f8b64d3d 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -2,15 +2,15 @@
 config DRM_VC4
 	tristate "Broadcom VC4 Graphics"
 	depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST
+	depends on COMMON_CLK
+	depends on DRM
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on PM
 	# Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only
 	# happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE.
 	depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE)
-	depends on DRM
 	depends on SND && SND_SOC
-	depends on COMMON_CLK
-	depends on PM
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_KMS_HELPER
 	select DRM_GEM_DMA_HELPER
 	select DRM_PANEL_BRIDGE
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index 12787bb9c111..186150f41fbc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -48,33 +48,20 @@ static void vmw_gem_object_close(struct drm_gem_object *obj,
 {
 }
 
-static int vmw_gem_pin_private(struct drm_gem_object *obj, bool do_pin)
+static int vmw_gem_object_pin(struct drm_gem_object *obj)
 {
-	struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj);
 	struct vmw_bo *vbo = to_vmw_bo(obj);
-	int ret;
-
-	ret = ttm_bo_reserve(bo, false, false, NULL);
-	if (unlikely(ret != 0))
-		goto err;
-
-	vmw_bo_pin_reserved(vbo, do_pin);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	return ret;
-}
 
+	vmw_bo_pin_reserved(vbo, true);
 
-static int vmw_gem_object_pin(struct drm_gem_object *obj)
-{
-	return vmw_gem_pin_private(obj, true);
+	return 0;
 }
 
 static void vmw_gem_object_unpin(struct drm_gem_object *obj)
 {
-	vmw_gem_pin_private(obj, false);
+	struct vmw_bo *vbo = to_vmw_bo(obj);
+
+	vmw_bo_pin_reserved(vbo, false);
 }
 
 static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 1a556d087e63..bfa0e9d4bd64 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_XE
 	tristate "Intel Xe Graphics"
-	depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
+	depends on (m || (y && KUNIT=y))
+	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HDCP_HELPER
+	depends on DRM_DISPLAY_HDMI_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on MMU
+	depends on PCI
 	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
@@ -13,10 +20,6 @@ config DRM_XE
 	select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n
 	select DRM_PANEL
 	select DRM_SUBALLOC_HELPER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDCP_HELPER
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_MIPI_DSI
 	select RELAY
 	select IRQ_WORK
diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig
index 68ee897de9d7..41d753b14ccd 100644
--- a/drivers/gpu/drm/xlnx/Kconfig
+++ b/drivers/gpu/drm/xlnx/Kconfig
@@ -1,13 +1,15 @@
 config DRM_ZYNQMP_DPSUB
 	tristate "ZynqMP DisplayPort Controller Driver"
 	depends on ARCH_ZYNQMP || COMPILE_TEST
-	depends on COMMON_CLK && DRM && OF
+	depends on COMMON_CLK
 	depends on DMADEVICES
+	depends on DRM
+	depends on DRM_DISPLAY_DP_HELPER
+	depends on DRM_DISPLAY_HELPER
+	depends on OF
 	depends on PHY_XILINX_ZYNQMP
 	depends on XILINX_ZYNQMP_DPDMA
 	select DMA_ENGINE
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
 	select DRM_GEM_DMA_HELPER
 	select DRM_KMS_HELPER
 	select GENERIC_PHY
diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c
index 1846c4971fd8..5a40aa1d4283 100644
--- a/drivers/gpu/drm/xlnx/zynqmp_dp.c
+++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c
@@ -1714,6 +1714,10 @@ int zynqmp_dp_probe(struct zynqmp_dpsub *dpsub)
 		goto err_free;
 	}
 
+	ret = zynqmp_dp_reset(dp, true);
+	if (ret < 0)
+		return ret;
+
 	ret = zynqmp_dp_reset(dp, false);
 	if (ret < 0)
 		goto err_free;