summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/host1x/job.c
diff options
context:
space:
mode:
authorMikko Perttunen <mperttunen@nvidia.com>2021-06-10 13:04:45 +0200
committerThierry Reding <treding@nvidia.com>2021-08-10 14:41:19 +0200
commite902585fc8b639f1a1258eaa6265e98994e34ef8 (patch)
tree58bb0fab112f61f6bfcc198d36a0e0cd5d98a516 /drivers/gpu/host1x/job.c
parentgpu: host1x: Add job release callback (diff)
downloadlinux-e902585fc8b639f1a1258eaa6265e98994e34ef8.tar.xz
linux-e902585fc8b639f1a1258eaa6265e98994e34ef8.zip
gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer
Add support for inserting syncpoint waits in the CDMA pushbuffer. These waits need to be done in HOST1X class, while gather submitted by the application execute in engine class. Support is added by converting the gather list of job into a command list that can include both gathers and waits. When the job is submitted, these commands are pushed as the appropriate opcodes on the CDMA pushbuffer. Also supported are waits relative to the start of the job, which are useful for jobs doing multiple things with an engine that doesn't natively support pipelining. While at it, use 32-bit waits on chips that support them. Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com>
Diffstat (limited to 'drivers/gpu/host1x/job.c')
-rw-r--r--drivers/gpu/host1x/job.c70
1 files changed, 51 insertions, 19 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 09097e19c0d0..32619b73a2fc 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -38,7 +38,7 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) +
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
- (u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
+ (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
(u64)num_unpins * sizeof(dma_addr_t) +
(u64)num_unpins * sizeof(u32 *);
if (total > ULONG_MAX)
@@ -57,8 +57,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
mem += num_relocs * sizeof(struct host1x_reloc);
job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * sizeof(struct host1x_job_unpin_data);
- job->gathers = num_cmdbufs ? mem : NULL;
- mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+ job->cmds = num_cmdbufs ? mem : NULL;
+ mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
job->addr_phys = num_unpins ? mem : NULL;
job->reloc_addr_phys = job->addr_phys;
@@ -101,22 +101,38 @@ EXPORT_SYMBOL(host1x_job_put);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
unsigned int words, unsigned int offset)
{
- struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
+ struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
gather->words = words;
gather->bo = bo;
gather->offset = offset;
- job->num_gathers++;
+ job->num_cmds++;
}
EXPORT_SYMBOL(host1x_job_add_gather);
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+ bool relative, u32 next_class)
+{
+ struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
+
+ cmd->is_wait = true;
+ cmd->wait.id = id;
+ cmd->wait.threshold = thresh;
+ cmd->wait.next_class = next_class;
+ cmd->wait.relative = relative;
+
+ job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
+
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
struct host1x_client *client = job->client;
struct device *dev = client->dev;
struct host1x_job_gather *g;
struct iommu_domain *domain;
+ struct sg_table *sgt;
unsigned int i;
int err;
@@ -126,7 +142,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
dma_addr_t phys_addr, *phys;
- struct sg_table *sgt;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -202,17 +217,20 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
return 0;
- for (i = 0; i < job->num_gathers; i++) {
+ for (i = 0; i < job->num_cmds; i++) {
size_t gather_size = 0;
struct scatterlist *sg;
- struct sg_table *sgt;
dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
dma_addr_t *phys;
unsigned int j;
- g = &job->gathers[i];
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
+
g->bo = host1x_bo_get(g->bo);
if (!g->bo) {
err = -EINVAL;
@@ -545,8 +563,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
fw.num_relocs = job->num_relocs;
fw.class = job->class;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
size += g->words * sizeof(u32);
}
@@ -568,10 +591,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
job->gather_copy_size = size;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
void *gather;
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
+
/* Copy the gather */
gather = host1x_bo_mmap(g->bo);
memcpy(job->gather_copy_mapped + offset, gather + g->offset,
@@ -614,8 +641,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
}
/* patch gathers */
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
/* process each gather mem only once */
if (g->handled)
@@ -625,10 +656,11 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
g->base = job->gather_addr_phys[i];
- for (j = i + 1; j < job->num_gathers; j++) {
- if (job->gathers[j].bo == g->bo) {
- job->gathers[j].handled = true;
- job->gathers[j].base = g->base;
+ for (j = i + 1; j < job->num_cmds; j++) {
+ if (!job->cmds[j].is_wait &&
+ job->cmds[j].gather.bo == g->bo) {
+ job->cmds[j].gather.handled = true;
+ job->cmds[j].gather.base = g->base;
}
}