summaryrefslogtreecommitdiffstats
path: root/include/drm/gpu_scheduler.h
diff options
context:
space:
mode:
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>2022-09-30 06:12:58 +0200
committerAndrey Grodzovsky <andrey.grodzovsky@amd.com>2022-09-30 15:12:08 +0200
commit08fb97de03aa2205c6791301bd83a095abc1949c (patch)
tree9520aa6fffc633c91add639325032ee2a22ea170 /include/drm/gpu_scheduler.h
parentdrm/panel: simple: Use dev_err_probe() to simplify code (diff)
downloadlinux-08fb97de03aa2205c6791301bd83a095abc1949c.tar.xz
linux-08fb97de03aa2205c6791301bd83a095abc1949c.zip
drm/sched: Add FIFO sched policy to run queue
When many entities are competing for the same run queue on the same scheduler, we observe an unusually long wait times and some jobs get starved. This has been observed on GPUVis. The issue is due to the Round Robin policy used by schedulers to pick up the next entity's job queue for execution. Under stress of many entities and long job queues within entity some jobs could be stuck for very long time in it's entity's queue before being popped from the queue and executed while for other entities with smaller job queues a job might execute earlier even though that job arrived later then the job in the long queue. Fix: Add FIFO selection policy to entities in run queue, chose next entity on run queue in such order that if job on one entity arrived earlier then job on another entity the first job will start executing earlier regardless of the length of the entity's job queue. v2: Switch to rb tree structure for entities based on TS of oldest job waiting in the job queue of an entity. Improves next entity extraction to O(1). Entity TS update O(log N) where N is the number of entities in the run-queue Drop default option in module control parameter. v3: Various cosmetical fixes and minor refactoring of fifo update function. (Luben) v4: Switch drm_sched_rq_select_entity_fifo to in order search (Luben) v5: Fix up drm_sched_rq_select_entity_fifo loop (Luben) v6: Add missing drm_sched_rq_remove_fifo_locked v7: Fix ts sampling bug and more cosmetic stuff (Luben) v8: Fix module parameter string (Luben) Cc: Luben Tuikov <luben.tuikov@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Direct Rendering Infrastructure - Development <dri-devel@lists.freedesktop.org> Cc: AMD Graphics <amd-gfx@lists.freedesktop.org> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Tested-by: Yunxiang Li (Teddy) <Yunxiang.Li@amd.com> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> Reviewed-by: Luben Tuikov <luben.tuikov@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220930041258.1050247-1-luben.tuikov@amd.com
Diffstat (limited to 'include/drm/gpu_scheduler.h')
-rw-r--r--include/drm/gpu_scheduler.h32
1 files changed, 32 insertions, 0 deletions
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 599855c6a672..1f7d9dd1a444 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -50,6 +50,12 @@ enum drm_sched_priority {
DRM_SCHED_PRIORITY_UNSET = -2
};
+/* Used to chose between FIFO and RR jobs scheduling */
+extern int drm_sched_policy;
+
+#define DRM_SCHED_POLICY_RR 0
+#define DRM_SCHED_POLICY_FIFO 1
+
/**
* struct drm_sched_entity - A wrapper around a job queue (typically
* attached to the DRM file_priv).
@@ -196,6 +202,21 @@ struct drm_sched_entity {
* drm_sched_entity_fini().
*/
struct completion entity_idle;
+
+ /**
+ * @oldest_job_waiting:
+ *
+ * Marks earliest job waiting in SW queue
+ */
+ ktime_t oldest_job_waiting;
+
+ /**
+ * @rb_tree_node:
+ *
+ * The node used to insert this entity into time based priority queue
+ */
+ struct rb_node rb_tree_node;
+
};
/**
@@ -205,6 +226,7 @@ struct drm_sched_entity {
* @sched: the scheduler to which this rq belongs to.
* @entities: list of the entities to be scheduled.
* @current_entity: the entity which is to be scheduled.
+ * @rb_tree_root: root of time based priory queue of entities for FIFO scheduling
*
* Run queue is a set of entities scheduling command submissions for
* one specific ring. It implements the scheduling policy that selects
@@ -215,6 +237,7 @@ struct drm_sched_rq {
struct drm_gpu_scheduler *sched;
struct list_head entities;
struct drm_sched_entity *current_entity;
+ struct rb_root_cached rb_tree_root;
};
/**
@@ -314,6 +337,13 @@ struct drm_sched_job {
/** @last_dependency: tracks @dependencies as they signal */
unsigned long last_dependency;
+
+ /**
+ * @submit_ts:
+ *
+ * When the job was pushed into the entity queue.
+ */
+ ktime_t submit_ts;
};
static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
@@ -503,6 +533,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
struct drm_sched_entity *entity);
+void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts);
+
int drm_sched_entity_init(struct drm_sched_entity *entity,
enum drm_sched_priority priority,
struct drm_gpu_scheduler **sched_list,