diff options
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r-- | drivers/gpu/host1x/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/host1x/cdma.c | 58 | ||||
-rw-r--r-- | drivers/gpu/host1x/fence.c | 168 | ||||
-rw-r--r-- | drivers/gpu/host1x/fence.h | 13 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/channel_hw.c | 87 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/debug_hw.c | 32 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/debug_hw_1x01.c | 8 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/debug_hw_1x06.c | 16 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/hw_host1x02_uclass.h | 12 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/hw_host1x04_uclass.h | 12 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/hw_host1x05_uclass.h | 12 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 12 | ||||
-rw-r--r-- | drivers/gpu/host1x/hw/hw_host1x07_uclass.h | 12 | ||||
-rw-r--r-- | drivers/gpu/host1x/intr.c | 9 | ||||
-rw-r--r-- | drivers/gpu/host1x/intr.h | 2 | ||||
-rw-r--r-- | drivers/gpu/host1x/job.c | 98 | ||||
-rw-r--r-- | drivers/gpu/host1x/job.h | 16 | ||||
-rw-r--r-- | drivers/gpu/host1x/syncpt.c | 2 | ||||
-rw-r--r-- | drivers/gpu/host1x/syncpt.h | 12 |
19 files changed, 510 insertions, 72 deletions
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 096017b8789d..d2b6f7de0498 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -9,6 +9,7 @@ host1x-y = \ job.o \ debug.o \ mipi.o \ + fence.o \ hw/host1x01.o \ hw/host1x02.o \ hw/host1x04.o \ diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 6e6ca774f68d..765e5aa64eb6 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -312,10 +312,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma) bool signal = false; struct host1x_job *job, *n; - /* If CDMA is stopped, queue is cleared and we can return */ - if (!cdma->running) - return; - /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking @@ -324,7 +320,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma) struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { /* Start timer on next pending syncpt */ if (job->timeout) cdma_start_timer_locked(cdma, job); @@ -413,8 +410,11 @@ syncpt_incr: else restart_addr = cdma->last_pos; + if (!job) + goto resume; + /* do CPU increments for the remaining syncpts */ - if (job) { + if (job->syncpt_recovery) { dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__); @@ -433,8 +433,44 @@ syncpt_incr: dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + } else { + struct host1x_job *failed_job = job; + + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + */ + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + + job->cancelled = true; + } + + wmb(); + + update_cdma_locked(cdma); } +resume: /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); } @@ -490,6 +526,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) mutex_lock(&cdma->lock); + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c new file mode 100644 index 000000000000..6941add95d0f --- /dev/null +++ b/drivers/gpu/host1x/fence.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Syncpoint dma_fence implementation + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#include <linux/dma-fence.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/sync_file.h> + +#include "fence.h" +#include "intr.h" +#include "syncpt.h" + +DEFINE_SPINLOCK(lock); + +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + + struct host1x_waitlist *waiter; + void *waiter_ref; + + struct delayed_work timeout_work; +}; + +static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) +{ + return "host1x"; +} + +static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) +{ + return "syncpoint"; +} + +static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) +{ + return container_of(f, struct host1x_syncpt_fence, base); +} + +static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + int err; + + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) + return false; + + dma_fence_get(f); + + /* + * The dma_fence framework requires the fence driver to keep a + * reference to any fences for which 'enable_signaling' has been + * called (and that have not been signalled). + * + * We provide a userspace API to create arbitrary syncpoint fences, + * so we cannot normally guarantee that all fences get signalled. + * As such, setup a timeout, so that long-lasting fences will get + * reaped eventually. + */ + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); + + err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold, + HOST1X_INTR_ACTION_SIGNAL_FENCE, f, + sf->waiter, &sf->waiter_ref); + if (err) { + cancel_delayed_work_sync(&sf->timeout_work); + dma_fence_put(f); + return false; + } + + /* intr framework takes ownership of waiter */ + sf->waiter = NULL; + + /* + * The fence may get signalled at any time after the above call, + * so we need to initialize all state used by signalling + * before it. + */ + + return true; +} + +static void host1x_syncpt_fence_release(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + if (sf->waiter) + kfree(sf->waiter); + + dma_fence_free(f); +} + +const struct dma_fence_ops host1x_syncpt_fence_ops = { + .get_driver_name = host1x_syncpt_fence_get_driver_name, + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, + .enable_signaling = host1x_syncpt_fence_enable_signaling, + .release = host1x_syncpt_fence_release, +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *f) +{ + if (atomic_xchg(&f->signaling, 1)) + return; + + /* + * Cancel pending timeout work - if it races, it will + * not get 'f->signaling' and return. + */ + cancel_delayed_work_sync(&f->timeout_work); + + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false); + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static void do_fence_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = (struct delayed_work *)work; + struct host1x_syncpt_fence *f = + container_of(dwork, struct host1x_syncpt_fence, timeout_work); + + if (atomic_xchg(&f->signaling, 1)) + return; + + /* + * Cancel pending timeout work - if it races, it will + * not get 'f->signaling' and return. + */ + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true); + + dma_fence_set_error(&f->base, -ETIMEDOUT); + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) +{ + struct host1x_syncpt_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL); + if (!fence->waiter) + return ERR_PTR(-ENOMEM); + + fence->sp = sp; + fence->threshold = threshold; + + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock, + dma_fence_context_alloc(1), 0); + + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); + + return &fence->base; +} +EXPORT_SYMBOL(host1x_fence_create); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h new file mode 100644 index 000000000000..70c91de82f14 --- /dev/null +++ b/drivers/gpu/host1x/fence.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef HOST1X_FENCE_H +#define HOST1X_FENCE_H + +struct host1x_syncpt_fence; + +void host1x_fence_signal(struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index d4c28faf27d1..1999780a7203 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,39 +47,84 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_gathers(struct host1x_job *job) +static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, + u32 next_class) +{ +#if HOST1X_HW >= 2 + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + host1x_opcode_setclass(next_class, 0, 0) + ); +#else + /* TODO add waitchk or use waitbases or other mitigation */ + host1x_cdma_push(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), + BIT(0) + ), + host1x_class_host_wait_syncpt(id, threshold) + ); + host1x_cdma_push(cdma, + host1x_opcode_setclass(next_class, 0, 0), + HOST1X_OPCODE_NOP + ); +#endif +} + +static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) { struct host1x_cdma *cdma = &job->channel->cdma; #if HOST1X_HW < 6 struct device *dev = job->channel->dev; #endif unsigned int i; + u32 threshold; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; - dma_addr_t addr = g->base + g->offset; - u32 op2, op3; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; - op2 = lower_32_bits(addr); - op3 = upper_32_bits(addr); + if (cmd->is_wait) { + if (cmd->wait.relative) + threshold = job_syncpt_base + cmd->wait.threshold; + else + threshold = cmd->wait.threshold; - trace_write_gather(cdma, g->bo, g->offset, g->words); + submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); + } else { + struct host1x_job_gather *g = &cmd->gather; + + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; + + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); - if (op3 != 0) { + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { #if HOST1X_HW >= 6 - u32 op1 = host1x_opcode_gather_wide(g->words); - u32 op4 = HOST1X_OPCODE_NOP; + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; - host1x_cdma_push_wide(cdma, op1, op2, op3, op4); + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); #else - dev_err(dev, "invalid gather for push buffer %pad\n", - &addr); - continue; + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; #endif - } else { - u32 op1 = host1x_opcode_gather(g->words); + } else { + u32 op1 = host1x_opcode_gather(g->words); - host1x_cdma_push(cdma, op1, op2); + host1x_cdma_push(cdma, op1, op2); + } } } } @@ -126,7 +171,7 @@ static int channel_submit(struct host1x_job *job) struct host1x *host = dev_get_drvdata(ch->dev->parent); trace_host1x_channel_submit(dev_name(ch->dev), - job->num_gathers, job->num_relocs, + job->num_cmds, job->num_relocs, job->syncpt->id, job->syncpt_incrs); /* before error checks, return current max */ @@ -181,7 +226,7 @@ static int channel_submit(struct host1x_job *job) host1x_opcode_setclass(job->class, 0, 0), HOST1X_OPCODE_NOP); - submit_gathers(job); + submit_gathers(job, syncval - user_syncpt_incrs); /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); @@ -191,7 +236,7 @@ static int channel_submit(struct host1x_job *job) /* schedule a submit complete interrupt */ err = host1x_intr_add_action(host, sp, syncval, HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, - completed_waiter, NULL); + completed_waiter, &job->waiter); completed_waiter = NULL; WARN(err, "Failed to set submit complete interrupt"); diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index ceb48229d14b..54e31d81517b 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -156,9 +156,9 @@ static unsigned int show_channel_command(struct output *o, u32 val, } } -static void show_gather(struct output *o, phys_addr_t phys_addr, +static void show_gather(struct output *o, dma_addr_t phys_addr, unsigned int words, struct host1x_cdma *cdma, - phys_addr_t pin_addr, u32 *map_addr) + dma_addr_t pin_addr, u32 *map_addr) { /* Map dmaget cursor to corresponding mem handle */ u32 offset = phys_addr - pin_addr; @@ -176,11 +176,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr, } for (i = 0; i < words; i++) { - u32 addr = phys_addr + i * 4; + dma_addr_t addr = phys_addr + i * 4; u32 val = *(map_addr + offset / 4 + i); if (!data_count) { - host1x_debug_output(o, "%08x: %08x: ", addr, val); + host1x_debug_output(o, " %pad: %08x: ", &addr, val); data_count = show_channel_command(o, val, &payload); } else { host1x_debug_cont(o, "%08x%s", val, @@ -195,23 +195,25 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) struct push_buffer *pb = &cdma->push_buffer; struct host1x_job *job; - host1x_debug_output(o, "PUSHBUF at %pad, %u words\n", - &pb->dma, pb->size / 4); - - show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped); - list_for_each_entry(job, &cdma->sync_queue, list) { unsigned int i; - host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", - job, job->syncpt->id, job->syncpt_end, - job->first_get, job->timeout, + host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n", + job->syncpt->id, job->syncpt_end, job->timeout, job->num_slots, job->num_unpins); - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma, + pb->dma + job->first_get, pb->mapped + job->first_get); + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; u32 *mapped; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + if (job->gather_copy_mapped) mapped = (u32 *)job->gather_copy_mapped; else @@ -222,7 +224,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) continue; } - host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", + host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", &g->base, g->offset, g->words); show_gather(o, g->base + g->offset, g->words, cdma, diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c index 02a93305ac7b..85242a59fa6a 100644 --- a/drivers/gpu/host1x/hw/debug_hw_1x01.c +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c @@ -16,10 +16,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, struct output *o) { struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart, dmaend; u32 dmaput, dmaget, dmactrl; u32 cbstat, cbread; u32 val, base, baseval; + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); @@ -56,9 +59,10 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), cbread); - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", dmaput, dmaget, dmactrl); - host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat); + host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat); show_channel_gathers(o, cdma); host1x_debug_output(o, "\n"); diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c index 6d1b583aa90f..9d0667879a19 100644 --- a/drivers/gpu/host1x/hw/debug_hw_1x06.c +++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c @@ -16,10 +16,23 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, struct output *o) { struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart = 0, dmaend = 0; u32 dmaput, dmaget, dmactrl; u32 offset, class; u32 ch_stat; +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI); + dmastart <<= 32; +#endif + dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI); + dmaend <<= 32; +#endif + dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); @@ -41,7 +54,8 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, host1x_debug_output(o, "active class %02x, offset %04x\n", class, offset); - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", dmaput, dmaget, dmactrl); host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat); diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h index 4fc51f70496b..0a2ab8f1da6f 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h index 9e84a4adca9f..60c692b92955 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h index aee5a4e32877..2fcc9a2ad3ef 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h index c4bacdb7155f..5f831438d19b 100644 --- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h index c74070f3f203..8cd2ef087d5d 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 6d1f3c0fdbe7..45b6be927ec4 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -13,6 +13,7 @@ #include <trace/events/host1x.h> #include "channel.h" #include "dev.h" +#include "fence.h" #include "intr.h" /* Wait list management */ @@ -121,12 +122,20 @@ static void action_wakeup_interruptible(struct host1x_waitlist *waiter) wake_up_interruptible(wq); } +static void action_signal_fence(struct host1x_waitlist *waiter) +{ + struct host1x_syncpt_fence *f = waiter->data; + + host1x_fence_signal(f); +} + typedef void (*action_handler)(struct host1x_waitlist *waiter); static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { action_submit_complete, action_wakeup, action_wakeup_interruptible, + action_signal_fence, }; static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 6ea55e615e3a..e4c346099273 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -33,6 +33,8 @@ enum host1x_intr_action { */ HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, + HOST1X_INTR_ACTION_SIGNAL_FENCE, + HOST1X_INTR_ACTION_COUNT }; diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index adbdc225de8d..0eef6df7c89e 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -24,21 +24,25 @@ #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs) + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall) { struct host1x_job *job = NULL; unsigned int num_unpins = num_relocs; + bool enable_firewall; u64 total; void *mem; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; + + if (!enable_firewall) num_unpins += num_cmdbufs; /* Check that we're not going to overflow */ total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); if (total > ULONG_MAX) @@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, if (!job) return NULL; + job->enable_firewall = enable_firewall; + kref_init(&job->ref); job->channel = ch; @@ -57,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->gathers = num_cmdbufs ? mem : NULL; - mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); job->addr_phys = num_unpins ? mem : NULL; job->reloc_addr_phys = job->addr_phys; @@ -79,6 +85,13 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->release) + job->release(job); + + if (job->waiter) + host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, + job->waiter, false); + if (job->syncpt) host1x_syncpt_put(job->syncpt); @@ -94,22 +107,38 @@ EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset) { - struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; gather->words = words; gather->bo = bo; gather->offset = offset; - job->num_gathers++; + job->num_cmds++; } EXPORT_SYMBOL(host1x_job_add_gather); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) +{ + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; + + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_wait); + static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; struct iommu_domain *domain; + struct sg_table *sgt; unsigned int i; int err; @@ -119,7 +148,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; dma_addr_t phys_addr, *phys; - struct sg_table *sgt; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -192,20 +220,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) * We will copy gathers BO content later, so there is no need to * hold and pin them. */ - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (job->enable_firewall) return 0; - for (i = 0; i < job->num_gathers; i++) { + for (i = 0; i < job->num_cmds; i++) { size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; dma_addr_t *phys; unsigned int j; - g = &job->gathers[i]; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + g->bo = host1x_bo_get(g->bo); if (!g->bo) { err = -EINVAL; @@ -296,7 +327,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) if (cmdbuf != reloc->cmdbuf.bo) continue; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { target = (u32 *)job->gather_copy_mapped + reloc->cmdbuf.offset / sizeof(u32) + g->offset / sizeof(u32); @@ -538,8 +569,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, fw.num_relocs = job->num_relocs; fw.class = job->class; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; size += g->words * sizeof(u32); } @@ -561,10 +597,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, job->gather_copy_size = size; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; void *gather; + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, @@ -600,28 +640,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (err) goto out; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { err = copy_gathers(host->dev, job, dev); if (err) goto out; } /* patch gathers */ - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; /* process each gather mem only once */ if (g->handled) continue; /* copy_gathers() sets gathers base if firewall is enabled */ - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (!job->enable_firewall) g->base = job->gather_addr_phys[i]; - for (j = i + 1; j < job->num_gathers; j++) { - if (job->gathers[j].bo == g->bo) { - job->gathers[j].handled = true; - job->gathers[j].base = g->base; + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; } } @@ -649,8 +694,7 @@ void host1x_job_unpin(struct host1x_job *job) struct device *dev = unpin->dev ?: host->dev; struct sg_table *sgt = unpin->sgt; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && - unpin->size && host->domain) { + if (!job->enable_firewall && unpin->size && host->domain) { iommu_unmap(host->domain, job->addr_phys[i], unpin->size); free_iova(&host->iova, diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..b4428c5495c9 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -18,6 +18,22 @@ struct host1x_job_gather { bool handled; }; +struct host1x_job_wait { + u32 id; + u32 threshold; + u32 next_class; + bool relative; +}; + +struct host1x_job_cmd { + bool is_wait; + + union { + struct host1x_job_gather gather; + struct host1x_job_wait wait; + }; +}; + struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index e648ebbb2027..d198a10848c6 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -407,6 +407,8 @@ static void syncpt_release(struct kref *ref) atomic_set(&sp->max_val, host1x_syncpt_read(sp)); + sp->locked = false; + mutex_lock(&sp->host->syncpt_mutex); host1x_syncpt_base_free(sp->base); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index a6766f8d55ee..95cd29b79d6d 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -40,6 +40,13 @@ struct host1x_syncpt { /* interrupt data */ struct host1x_syncpt_intr intr; + + /* + * If a submission incrementing this syncpoint fails, lock it so that + * further submission cannot be made until application has handled the + * failure. + */ + bool locked; }; /* Initialize sync point array */ @@ -115,4 +122,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } +static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) +{ + sp->locked = true; +} + #endif |