7 files changed, 449 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 3c38e2a2dba3..f149847cbd17 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -167,7 +167,7 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
 			ring_id_to_context_switch_event(execlist->ring_id));
 }
 
-int emulate_execlist_ctx_schedule_out(
+static int emulate_execlist_ctx_schedule_out(
 		struct intel_vgpu_execlist *execlist,
 		struct execlist_ctx_descriptor_format *ctx)
 {
@@ -260,7 +260,7 @@ static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
 	return &execlist->slot[status.execlist_write_pointer];
 }
 
-int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
+static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
 		struct execlist_ctx_descriptor_format ctx[2])
 {
 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
@@ -353,6 +353,279 @@ int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
 	return 0;
 }
 
+static void free_workload(struct intel_vgpu_workload *workload)
+{
+	intel_vgpu_unpin_mm(workload->shadow_mm);
+	intel_gvt_mm_unreference(workload->shadow_mm);
+	kmem_cache_free(workload->vgpu->workloads, workload);
+}
+
+#define get_desc_from_elsp_dwords(ed, i) \
+	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
+
+static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct execlist_ctx_descriptor_format ctx[2];
+	int ring_id = workload->ring_id;
+
+	intel_vgpu_pin_mm(workload->shadow_mm);
+	intel_vgpu_sync_oos_pages(workload->vgpu);
+	intel_vgpu_flush_post_shadow(workload->vgpu);
+	if (!workload->emulate_schedule_in)
+		return 0;
+
+	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
+	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
+
+	return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
+}
+
+static int complete_execlist_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_execlist *execlist =
+		&vgpu->execlist[workload->ring_id];
+	struct intel_vgpu_workload *next_workload;
+	struct list_head *next = workload_q_head(vgpu, workload->ring_id)->next;
+	bool lite_restore = false;
+	int ret;
+
+	gvt_dbg_el("complete workload %p status %d\n", workload,
+			workload->status);
+
+	if (workload->status)
+		goto out;
+
+	if (!list_empty(workload_q_head(vgpu, workload->ring_id))) {
+		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
+
+		next_workload = container_of(next,
+				struct intel_vgpu_workload, list);
+		this_desc = &workload->ctx_desc;
+		next_desc = &next_workload->ctx_desc;
+
+		lite_restore = same_context(this_desc, next_desc);
+	}
+
+	if (lite_restore) {
+		gvt_dbg_el("next context == current - no schedule-out\n");
+		free_workload(workload);
+		return 0;
+	}
+
+	ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
+	if (ret)
+		goto err;
+out:
+	free_workload(workload);
+	return 0;
+err:
+	free_workload(workload);
+	return ret;
+}
+
+#define RING_CTX_OFF(x) \
+	offsetof(struct execlist_ring_context, x)
+
+static void read_guest_pdps(struct intel_vgpu *vgpu,
+		u64 ring_context_gpa, u32 pdp[8])
+{
+	u64 gpa;
+	int i;
+
+	gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val);
+
+	for (i = 0; i < 8; i++)
+		intel_gvt_hypervisor_read_gpa(vgpu,
+				gpa + i * 8, &pdp[7 - i], 4);
+}
+
+static int prepare_mm(struct intel_vgpu_workload *workload)
+{
+	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
+	struct intel_vgpu_mm *mm;
+	int page_table_level;
+	u32 pdp[8];
+
+	if (desc->addressing_mode == 1) { /* legacy 32-bit */
+		page_table_level = 3;
+	} else if (desc->addressing_mode == 3) { /* legacy 64 bit */
+		page_table_level = 4;
+	} else {
+		gvt_err("Advanced Context mode(SVM) is not supported!\n");
+		return -EINVAL;
+	}
+
+	read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
+
+	mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
+	if (mm) {
+		intel_gvt_mm_reference(mm);
+	} else {
+
+		mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
+				pdp, page_table_level, 0);
+		if (IS_ERR(mm)) {
+			gvt_err("fail to create mm object.\n");
+			return PTR_ERR(mm);
+		}
+	}
+	workload->shadow_mm = mm;
+	return 0;
+}
+
+#define get_last_workload(q) \
+	(list_empty(q) ? NULL : container_of(q->prev, \
+	struct intel_vgpu_workload, list))
+
+bool submit_context(struct intel_vgpu *vgpu, int ring_id,
+		struct execlist_ctx_descriptor_format *desc,
+		bool emulate_schedule_in)
+{
+	struct list_head *q = workload_q_head(vgpu, ring_id);
+	struct intel_vgpu_workload *last_workload = get_last_workload(q);
+	struct intel_vgpu_workload *workload = NULL;
+	u64 ring_context_gpa;
+	u32 head, tail, start, ctl, ctx_ctl;
+	int ret;
+
+	ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
+			(u32)((desc->lrca + 1) << GTT_PAGE_SHIFT));
+	if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_err("invalid guest context LRCA: %x\n", desc->lrca);
+		return -EINVAL;
+	}
+
+	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(ring_header.val), &head, 4);
+
+	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(ring_tail.val), &tail, 4);
+
+	head &= RB_HEAD_OFF_MASK;
+	tail &= RB_TAIL_OFF_MASK;
+
+	if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
+		gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
+		gvt_dbg_el("ctx head %x real head %lx\n", head,
+				last_workload->rb_tail);
+		/*
+		 * cannot use guest context head pointer here,
+		 * as it might not be updated at this time
+		 */
+		head = last_workload->rb_tail;
+	}
+
+	gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
+
+	workload = kmem_cache_zalloc(vgpu->workloads, GFP_KERNEL);
+	if (!workload)
+		return -ENOMEM;
+
+	/* record some ring buffer register values for scan and shadow */
+	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(rb_start.val), &start, 4);
+	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
+	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
+
+	INIT_LIST_HEAD(&workload->list);
+
+	init_waitqueue_head(&workload->shadow_ctx_status_wq);
+	atomic_set(&workload->shadow_ctx_active, 0);
+
+	workload->vgpu = vgpu;
+	workload->ring_id = ring_id;
+	workload->ctx_desc = *desc;
+	workload->ring_context_gpa = ring_context_gpa;
+	workload->rb_head = head;
+	workload->rb_tail = tail;
+	workload->rb_start = start;
+	workload->rb_ctl = ctl;
+	workload->prepare = prepare_execlist_workload;
+	workload->complete = complete_execlist_workload;
+	workload->status = -EINPROGRESS;
+	workload->emulate_schedule_in = emulate_schedule_in;
+
+	if (emulate_schedule_in)
+		memcpy(&workload->elsp_dwords,
+				&vgpu->execlist[ring_id].elsp_dwords,
+				sizeof(workload->elsp_dwords));
+
+	gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
+			workload, ring_id, head, tail, start, ctl);
+
+	gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
+			emulate_schedule_in);
+
+	ret = prepare_mm(workload);
+	if (ret) {
+		kmem_cache_free(vgpu->workloads, workload);
+		return ret;
+	}
+
+	queue_workload(workload);
+	return 0;
+}
+
+int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
+{
+	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
+	struct execlist_ctx_descriptor_format *desc[2], valid_desc[2];
+	unsigned long valid_desc_bitmap = 0;
+	bool emulate_schedule_in = true;
+	int ret;
+	int i;
+
+	memset(valid_desc, 0, sizeof(valid_desc));
+
+	desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
+	desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
+
+	for (i = 0; i < 2; i++) {
+		if (!desc[i]->valid)
+			continue;
+
+		if (!desc[i]->privilege_access) {
+			gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
+					vgpu->id);
+			return -EINVAL;
+		}
+
+		/* TODO: add another guest context checks here. */
+		set_bit(i, &valid_desc_bitmap);
+		valid_desc[i] = *desc[i];
+	}
+
+	if (!valid_desc_bitmap) {
+		gvt_err("vgpu%d: no valid desc in a elsp submission\n",
+				vgpu->id);
+		return -EINVAL;
+	}
+
+	if (!test_bit(0, (void *)&valid_desc_bitmap) &&
+			test_bit(1, (void *)&valid_desc_bitmap)) {
+		gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
+				vgpu->id);
+		return -EINVAL;
+	}
+
+	/* submit workload */
+	for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) {
+		ret = submit_context(vgpu, ring_id, &valid_desc[i],
+				emulate_schedule_in);
+		if (ret) {
+			gvt_err("vgpu%d: fail to schedule workload\n",
+					vgpu->id);
+			return ret;
+		}
+		emulate_schedule_in = false;
+	}
+	return 0;
+}
+
 static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 {
 	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
@@ -374,13 +647,28 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 
+void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
+{
+	kmem_cache_destroy(vgpu->workloads);
+}
+
 int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
 {
 	int i;
 
 	/* each ring has a virtual execlist engine */
-	for (i = 0; i < I915_NUM_ENGINES; i++)
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		init_vgpu_execlist(vgpu, i);
+		INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
+	}
+
+	vgpu->workloads = kmem_cache_create("gvt-g vgpu workload",
+			sizeof(struct intel_vgpu_workload), 0,
+			SLAB_HWCACHE_ALIGN,
+			NULL);
+
+	if (!vgpu->workloads)
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h
index 428f54358ae8..13614fb4e572 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.h
+++ b/drivers/gpu/drm/i915/gvt/execlist.h
@@ -118,6 +118,49 @@ struct execlist_context_status_format {
 	};
 };
 
+struct execlist_mmio_pair {
+	u32 addr;
+	u32 val;
+};
+
+/* The first 52 dwords in register state context */
+struct execlist_ring_context {
+	u32 nop1;
+	u32 lri_cmd_1;
+	struct execlist_mmio_pair ctx_ctrl;
+	struct execlist_mmio_pair ring_header;
+	struct execlist_mmio_pair ring_tail;
+	struct execlist_mmio_pair rb_start;
+	struct execlist_mmio_pair rb_ctrl;
+	struct execlist_mmio_pair bb_cur_head_UDW;
+	struct execlist_mmio_pair bb_cur_head_LDW;
+	struct execlist_mmio_pair bb_state;
+	struct execlist_mmio_pair second_bb_addr_UDW;
+	struct execlist_mmio_pair second_bb_addr_LDW;
+	struct execlist_mmio_pair second_bb_state;
+	struct execlist_mmio_pair bb_per_ctx_ptr;
+	struct execlist_mmio_pair rcs_indirect_ctx;
+	struct execlist_mmio_pair rcs_indirect_ctx_offset;
+	u32 nop2;
+	u32 nop3;
+	u32 nop4;
+	u32 lri_cmd_2;
+	struct execlist_mmio_pair ctx_timestamp;
+	struct execlist_mmio_pair pdp3_UDW;
+	struct execlist_mmio_pair pdp3_LDW;
+	struct execlist_mmio_pair pdp2_UDW;
+	struct execlist_mmio_pair pdp2_LDW;
+	struct execlist_mmio_pair pdp1_UDW;
+	struct execlist_mmio_pair pdp1_LDW;
+	struct execlist_mmio_pair pdp0_UDW;
+	struct execlist_mmio_pair pdp0_LDW;
+};
+
+struct intel_vgpu_elsp_dwords {
+	u32 data[4];
+	u32 index;
+};
+
 struct intel_vgpu_execlist_slot {
 	struct execlist_ctx_descriptor_format ctx[2];
 	u32 index;
@@ -130,8 +173,13 @@ struct intel_vgpu_execlist {
 	struct execlist_ctx_descriptor_format *running_context;
 	int ring_id;
 	struct intel_vgpu *vgpu;
+	struct intel_vgpu_elsp_dwords elsp_dwords;
 };
 
+void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu);
+
 int intel_vgpu_init_execlist(struct intel_vgpu *vgpu);
 
+int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id);
+
 #endif /*_GVT_EXECLIST_H_*/
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 82f932a24e7d..414163695e07 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -42,6 +42,7 @@
 #include "display.h"
 #include "edid.h"
 #include "execlist.h"
+#include "scheduler.h"
 
 #define GVT_MAX_VGPU 8
 
@@ -147,8 +148,9 @@ struct intel_vgpu {
 	struct intel_vgpu_gtt gtt;
 	struct intel_vgpu_opregion opregion;
 	struct intel_vgpu_display display;
-	/* TODO: move the declaration of intel_gvt.h to a proper place. */
 	struct intel_vgpu_execlist execlist[I915_NUM_ENGINES];
+	struct list_head workload_q_head[I915_NUM_ENGINES];
+	struct kmem_cache *workloads;
 };
 
 struct intel_gvt_gm {
@@ -193,6 +195,7 @@ struct intel_gvt {
 	struct intel_gvt_irq irq;
 	struct intel_gvt_gtt gtt;
 	struct intel_gvt_opregion opregion;
+	struct intel_gvt_workload_scheduler scheduler;
 
 	struct task_struct *service_thread;
 	wait_queue_head_t service_thread_wq;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 194778b374ff..970804aed381 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -128,6 +128,18 @@ static int new_mmio_info(struct intel_gvt *gvt,
 	return 0;
 }
 
+static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
+{
+	int i;
+
+	reg &= ~GENMASK(11, 0);
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		if (gvt->dev_priv->engine[i].mmio_base == reg)
+			return i;
+	}
+	return -1;
+}
+
 #define offset_to_fence_num(offset) \
 	((offset - i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) >> 3)
 
@@ -1262,6 +1274,28 @@ static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu,
 	return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
 }
 
+static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset);
+	struct intel_vgpu_execlist *execlist;
+	u32 data = *(u32 *)p_data;
+	int ret;
+
+	if (WARN_ON(ring_id < 0))
+		return -EINVAL;
+
+	execlist = &vgpu->execlist[ring_id];
+
+	execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data;
+	if (execlist->elsp_dwords.index == 3)
+		ret = intel_vgpu_submit_execlist(vgpu, ring_id);
+
+	++execlist->elsp_dwords.index;
+	execlist->elsp_dwords.index &= 0x3;
+	return 0;
+}
+
 #define MMIO_F(reg, s, f, am, rm, d, r, w) do { \
 	ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \
 		f, s, am, rm, d, r, w); \
@@ -2169,8 +2203,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS);
 
 #define RING_REG(base) (base + 0x230)
-	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, NULL);
-	MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
+	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write);
+	MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x234)
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 4842cb97c892..0dfe789d8f02 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -72,4 +72,9 @@
 #define FORCEWAKE_ACK_MEDIA_GEN9_REG 0x0D88
 #define FORCEWAKE_ACK_HSW_REG 0x130044
 
+#define RB_HEAD_OFF_MASK	((1U << 21) - (1U << 2))
+#define RB_TAIL_OFF_MASK	((1U << 21) - (1U << 3))
+#define RB_TAIL_SIZE_MASK	((1U << 21) - (1U << 12))
+#define _RING_CTL_BUF_SIZE(ctl) (((ctl) & RB_TAIL_SIZE_MASK) + GTT_PAGE_SIZE)
+
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
new file mode 100644
index 000000000000..8884749f0bd4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _GVT_SCHEDULER_H_
+#define _GVT_SCHEDULER_H_
+
+struct intel_gvt_workload_scheduler {
+	struct list_head workload_q_head[I915_NUM_ENGINES];
+};
+
+struct intel_vgpu_workload {
+	struct intel_vgpu *vgpu;
+	int ring_id;
+	struct drm_i915_gem_request *req;
+	/* if this workload has been dispatched to i915? */
+	bool dispatched;
+	int status;
+
+	struct intel_vgpu_mm *shadow_mm;
+
+	/* different submission model may need different handler */
+	int (*prepare)(struct intel_vgpu_workload *);
+	int (*complete)(struct intel_vgpu_workload *);
+	struct list_head list;
+
+	/* execlist context information */
+	struct execlist_ctx_descriptor_format ctx_desc;
+	struct execlist_ring_context *ring_context;
+	unsigned long rb_head, rb_tail, rb_ctl, rb_start;
+	struct intel_vgpu_elsp_dwords elsp_dwords;
+	bool emulate_schedule_in;
+	atomic_t shadow_ctx_active;
+	wait_queue_head_t shadow_ctx_status_wq;
+	u64 ring_context_gpa;
+};
+
+#define workload_q_head(vgpu, ring_id) \
+	(&(vgpu->workload_q_head[ring_id]))
+
+#define queue_workload(workload) \
+	list_add_tail(&workload->list, \
+	workload_q_head(workload->vgpu, workload->ring_id))
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index e806b0e4035a..705a23c1ed85 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -146,6 +146,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	vgpu->active = false;
 	idr_remove(&gvt->vgpu_idr, vgpu->id);
 
+	intel_vgpu_clean_execlist(vgpu);
 	intel_vgpu_clean_display(vgpu);
 	intel_vgpu_clean_opregion(vgpu);
 	intel_vgpu_clean_gtt(vgpu);