From b12f601fe5f0e0cf37c4efd00c41a033dff67664 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Apr 2017 20:24:41 +0200 Subject: gpu: host1x: select IOMMU_IOVA When IOMMU_IOVA is not built-in but host1x is, we get a link error: drivers/gpu/host1x/dev.o: In function `host1x_remove': dev.c:(.text.host1x_remove+0x50): undefined reference to `put_iova_domain' drivers/gpu/host1x/dev.o: In function `host1x_probe': dev.c:(.text.host1x_probe+0x31c): undefined reference to `init_iova_domain' dev.c:(.text.host1x_probe+0x38c): undefined reference to `put_iova_domain' drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init': cdma.c:(.text.host1x_cdma_init+0x238): undefined reference to `alloc_iova' cdma.c:(.text.host1x_cdma_init+0x2c0): undefined reference to `__free_iova' drivers/gpu/host1x/cdma.o: In function `host1x_cdma_deinit': cdma.c:(.text.host1x_cdma_deinit+0xb0): undefined reference to `free_iova' This adds the same select statement that we have for drm_tegra. Fixes: 404bfb78daf3 ("gpu: host1x: Add IOMMU support") Signed-off-by: Arnd Bergmann Reviewed-by: Mikko Perttunen Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/20170419182449.885312-1-arnd@arndb.de --- drivers/gpu/host1x/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index b2fd029d67b3..91916326957f 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -1,6 +1,7 @@ config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" depends on ARCH_TEGRA || (ARM && COMPILE_TEST) + select IOMMU_IOVA if IOMMU_SUPPORT help Driver for the NVIDIA Tegra host1x hardware. -- cgit v1.2.3 From 466749f13e33d892cf9263d7efbc0ea713c23ed7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 10 Apr 2017 12:27:01 +0200 Subject: gpu: host1x: Flesh out kerneldoc Improve kerneldoc for the public parts of the host1x infrastructure in preparation for adding driver-specific part to the GPU documentation. Acked-by: Daniel Vetter Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 75 +++++++++++++++++++++++++++++++++++++++++ drivers/gpu/host1x/syncpt.c | 81 +++++++++++++++++++++++++++++++++++++++------ include/linux/host1x.h | 25 ++++++++++++++ 3 files changed, 170 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 561831e1ae2c..a048e3ac523d 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -40,6 +40,9 @@ struct host1x_subdev { /** * host1x_subdev_add() - add a new subdevice with an associated device node + * @device: host1x device to add the subdevice to + * @driver: host1x driver + * @np: device node */ static int host1x_subdev_add(struct host1x_device *device, struct device_node *np) @@ -62,6 +65,7 @@ static int host1x_subdev_add(struct host1x_device *device, /** * host1x_subdev_del() - remove subdevice + * @subdev: subdevice to remove */ static void host1x_subdev_del(struct host1x_subdev *subdev) { @@ -72,6 +76,8 @@ static void host1x_subdev_del(struct host1x_subdev *subdev) /** * host1x_device_parse_dt() - scan device tree and add matching subdevices + * @device: host1x logical device + * @driver: host1x driver */ static int host1x_device_parse_dt(struct host1x_device *device, struct host1x_driver *driver) @@ -166,6 +172,16 @@ static void host1x_subdev_unregister(struct host1x_device *device, mutex_unlock(&device->subdevs_lock); } +/** + * host1x_device_init() - initialize a host1x logical device + * @device: host1x logical device + * + * The driver for the host1x logical device can call this during execution of + * its &host1x_driver.probe implementation to initialize each of its clients. + * The client drivers access the subsystem specific driver data using the + * &host1x_client.parent field and driver data associated with it (usually by + * calling dev_get_drvdata()). + */ int host1x_device_init(struct host1x_device *device) { struct host1x_client *client; @@ -192,6 +208,15 @@ int host1x_device_init(struct host1x_device *device) } EXPORT_SYMBOL(host1x_device_init); +/** + * host1x_device_exit() - uninitialize host1x logical device + * @device: host1x logical device + * + * When the driver for a host1x logical device is unloaded, it can call this + * function to tear down each of its clients. Typically this is done after a + * subsystem-specific data structure is removed and the functionality can no + * longer be used. + */ int host1x_device_exit(struct host1x_device *device) { struct host1x_client *client; @@ -446,6 +471,14 @@ static void host1x_detach_driver(struct host1x *host1x, mutex_unlock(&host1x->devices_lock); } +/** + * host1x_register() - register a host1x controller + * @host1x: host1x controller + * + * The host1x controller driver uses this to register a host1x controller with + * the infrastructure. Note that all Tegra SoC generations have only ever come + * with a single host1x instance, so this function is somewhat academic. + */ int host1x_register(struct host1x *host1x) { struct host1x_driver *driver; @@ -464,6 +497,13 @@ int host1x_register(struct host1x *host1x) return 0; } +/** + * host1x_unregister() - unregister a host1x controller + * @host1x: host1x controller + * + * The host1x controller driver uses this to remove a host1x controller from + * the infrastructure. + */ int host1x_unregister(struct host1x *host1x) { struct host1x_driver *driver; @@ -513,6 +553,16 @@ static void host1x_device_shutdown(struct device *dev) driver->shutdown(device); } +/** + * host1x_driver_register_full() - register a host1x driver + * @driver: host1x driver + * @owner: owner module + * + * Drivers for host1x logical devices call this function to register a driver + * with the infrastructure. Note that since these drive logical devices, the + * registration of the driver actually triggers tho logical device creation. + * A logical device will be created for each host1x instance. + */ int host1x_driver_register_full(struct host1x_driver *driver, struct module *owner) { @@ -541,6 +591,13 @@ int host1x_driver_register_full(struct host1x_driver *driver, } EXPORT_SYMBOL(host1x_driver_register_full); +/** + * host1x_driver_unregister() - unregister a host1x driver + * @driver: host1x driver + * + * Unbinds the driver from each of the host1x logical devices that it is + * bound to, effectively removing the subsystem devices that they represent. + */ void host1x_driver_unregister(struct host1x_driver *driver) { driver_unregister(&driver->driver); @@ -551,6 +608,17 @@ void host1x_driver_unregister(struct host1x_driver *driver) } EXPORT_SYMBOL(host1x_driver_unregister); +/** + * host1x_client_register() - register a host1x client + * @client: host1x client + * + * Registers a host1x client with each host1x controller instance. Note that + * each client will only match their parent host1x controller and will only be + * associated with that instance. Once all clients have been registered with + * their parent host1x controller, the infrastructure will set up the logical + * device and call host1x_device_init(), which will in turn call each client's + * &host1x_client_ops.init implementation. + */ int host1x_client_register(struct host1x_client *client) { struct host1x *host1x; @@ -576,6 +644,13 @@ int host1x_client_register(struct host1x_client *client) } EXPORT_SYMBOL(host1x_client_register); +/** + * host1x_client_unregister() - unregister a host1x client + * @client: host1x client + * + * Removes a host1x client from its host1x controller instance. If a logical + * device has already been initialized, it will be torn down. + */ int host1x_client_unregister(struct host1x_client *client) { struct host1x_client *c; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 0ac026cdc30c..048ac9e344ce 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -99,14 +99,24 @@ unlock: return NULL; } +/** + * host1x_syncpt_id() - retrieve syncpoint ID + * @sp: host1x syncpoint + * + * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is + * often used as a value to program into registers that control how hardware + * blocks interact with syncpoints. + */ u32 host1x_syncpt_id(struct host1x_syncpt *sp) { return sp->id; } EXPORT_SYMBOL(host1x_syncpt_id); -/* - * Updates the value sent to hardware. +/** + * host1x_syncpt_incr_max() - update the value sent to hardware + * @sp: host1x syncpoint + * @incrs: number of increments */ u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs) { @@ -175,8 +185,9 @@ u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp) return sp->base_val; } -/* - * Increment syncpoint value from cpu, updating cache +/** + * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache + * @sp: host1x syncpoint */ int host1x_syncpt_incr(struct host1x_syncpt *sp) { @@ -195,8 +206,12 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) return host1x_syncpt_is_expired(sp, thresh); } -/* - * Main entrypoint for syncpoint value waits. +/** + * host1x_syncpt_wait() - wait for a syncpoint to reach a given value + * @sp: host1x syncpoint + * @thresh: threshold + * @timeout: maximum time to wait for the syncpoint to reach the given value + * @value: return location for the syncpoint value */ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value) @@ -402,6 +417,16 @@ int host1x_syncpt_init(struct host1x *host) return 0; } +/** + * host1x_syncpt_request() - request a syncpoint + * @dev: device requesting the syncpoint + * @flags: flags + * + * host1x client drivers can use this function to allocate a syncpoint for + * subsequent use. A syncpoint returned by this function will be reserved for + * use by the client exclusively. When no longer using a syncpoint, a host1x + * client driver needs to release it using host1x_syncpt_free(). + */ struct host1x_syncpt *host1x_syncpt_request(struct device *dev, unsigned long flags) { @@ -411,6 +436,16 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev, } EXPORT_SYMBOL(host1x_syncpt_request); +/** + * host1x_syncpt_free() - free a requested syncpoint + * @sp: host1x syncpoint + * + * Release a syncpoint previously allocated using host1x_syncpt_request(). A + * host1x client driver should call this when the syncpoint is no longer in + * use. Note that client drivers must ensure that the syncpoint doesn't remain + * under the control of hardware after calling this function, otherwise two + * clients may end up trying to access the same syncpoint concurrently. + */ void host1x_syncpt_free(struct host1x_syncpt *sp) { if (!sp) @@ -438,9 +473,12 @@ void host1x_syncpt_deinit(struct host1x *host) kfree(sp->name); } -/* - * Read max. It indicates how many operations there are in queue, either in - * channel or in a software thread. +/** + * host1x_syncpt_read_max() - read maximum syncpoint value + * @sp: host1x syncpoint + * + * The maximum syncpoint value indicates how many operations there are in + * queue, either in channel or in a software thread. */ u32 host1x_syncpt_read_max(struct host1x_syncpt *sp) { @@ -450,8 +488,12 @@ u32 host1x_syncpt_read_max(struct host1x_syncpt *sp) } EXPORT_SYMBOL(host1x_syncpt_read_max); -/* - * Read min, which is a shadow of the current sync point value in hardware. +/** + * host1x_syncpt_read_min() - read minimum syncpoint value + * @sp: host1x syncpoint + * + * The minimum syncpoint value is a shadow of the current sync point value in + * hardware. */ u32 host1x_syncpt_read_min(struct host1x_syncpt *sp) { @@ -461,6 +503,10 @@ u32 host1x_syncpt_read_min(struct host1x_syncpt *sp) } EXPORT_SYMBOL(host1x_syncpt_read_min); +/** + * host1x_syncpt_read() - read the current syncpoint value + * @sp: host1x syncpoint + */ u32 host1x_syncpt_read(struct host1x_syncpt *sp) { return host1x_syncpt_load(sp); @@ -482,6 +528,11 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host) return host->info->nb_mlocks; } +/** + * host1x_syncpt_get() - obtain a syncpoint by ID + * @host: host1x controller + * @id: syncpoint ID + */ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id) { if (id >= host->info->nb_pts) @@ -491,12 +542,20 @@ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id) } EXPORT_SYMBOL(host1x_syncpt_get); +/** + * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint + * @sp: host1x syncpoint + */ struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp) { return sp ? sp->base : NULL; } EXPORT_SYMBOL(host1x_syncpt_get_base); +/** + * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base + * @base: host1x syncpoint wait base + */ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base) { return base->id; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 3d04aa1dc83e..840a8ad627b2 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -32,11 +32,27 @@ enum host1x_class { struct host1x_client; +/** + * struct host1x_client_ops - host1x client operations + * @init: host1x client initialization code + * @exit: host1x client tear down code + */ struct host1x_client_ops { int (*init)(struct host1x_client *client); int (*exit)(struct host1x_client *client); }; +/** + * struct host1x_client - host1x client structure + * @list: list node for the host1x client + * @parent: pointer to struct device representing the host1x controller + * @dev: pointer to struct device backing this host1x client + * @ops: host1x client operations + * @class: host1x class represented by this client + * @channel: host1x channel associated with this client + * @syncpts: array of syncpoints requested for this client + * @num_syncpts: number of syncpoints requested for this client + */ struct host1x_client { struct list_head list; struct device *parent; @@ -251,6 +267,15 @@ void host1x_job_unpin(struct host1x_job *job); struct host1x_device; +/** + * struct host1x_driver - host1x logical device driver + * @driver: core driver + * @subdevs: table of OF device IDs matching subdevices for this driver + * @list: list node for the driver + * @probe: called when the host1x logical device is probed + * @remove: called when the host1x logical device is removed + * @shutdown: called when the host1x logical device is shut down + */ struct host1x_driver { struct device_driver driver; -- cgit v1.2.3 From d0fbbdff2e19aabccc1107b7e12ab9f3cbf626ef Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:27 +0300 Subject: drm/tegra: Correct copying of waitchecks and disable them in the 'submit' IOCTL The waitchecks along with multiple syncpoints per submit are not ready for use yet, let's forbid them for now. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 60 ++++++++++++++++++++++++++++++++++++++++++--- drivers/gpu/host1x/job.h | 7 ------ include/linux/host1x.h | 7 ++++++ 3 files changed, 63 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 51e20e015053..0928f2bb4203 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -349,6 +349,36 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, return 0; } +static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest, + struct drm_tegra_waitchk __user *src, + struct drm_file *file) +{ + u32 cmdbuf; + int err; + + err = get_user(cmdbuf, &src->handle); + if (err < 0) + return err; + + err = get_user(dest->offset, &src->offset); + if (err < 0) + return err; + + err = get_user(dest->syncpt_id, &src->syncpt); + if (err < 0) + return err; + + err = get_user(dest->thresh, &src->thresh); + if (err < 0) + return err; + + dest->bo = host1x_bo_lookup(file, cmdbuf); + if (!dest->bo) + return -ENOENT; + + return 0; +} + int tegra_drm_submit(struct tegra_drm_context *context, struct drm_tegra_submit *args, struct drm_device *drm, struct drm_file *file) @@ -370,6 +400,10 @@ int tegra_drm_submit(struct tegra_drm_context *context, if (args->num_syncpts != 1) return -EINVAL; + /* We don't yet support waitchks */ + if (args->num_waitchks != 0) + return -EINVAL; + job = host1x_job_alloc(context->channel, args->num_cmdbufs, args->num_relocs, args->num_waitchks); if (!job) @@ -458,10 +492,28 @@ int tegra_drm_submit(struct tegra_drm_context *context, } } - if (copy_from_user(job->waitchk, waitchks, - sizeof(*waitchks) * num_waitchks)) { - err = -EFAULT; - goto fail; + /* copy and resolve waitchks from submit */ + while (num_waitchks--) { + struct host1x_waitchk *wait = &job->waitchk[num_waitchks]; + struct tegra_bo *obj; + + err = host1x_waitchk_copy_from_user(wait, + &waitchks[num_waitchks], + file); + if (err < 0) + goto fail; + + obj = host1x_to_tegra_bo(wait->bo); + + /* + * The unaligned offset will cause an unaligned write during + * of the waitchks patching, corrupting the commands stream. + */ + if (wait->offset & 3 || + wait->offset >= obj->gem.size) { + err = -EINVAL; + goto fail; + } } if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts, diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 878239c476d2..0debd93a1849 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -34,13 +34,6 @@ struct host1x_cmdbuf { u32 pad; }; -struct host1x_waitchk { - struct host1x_bo *bo; - u32 offset; - u32 syncpt_id; - u32 thresh; -}; - struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 840a8ad627b2..ba0b245da732 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -193,6 +193,13 @@ struct host1x_reloc { unsigned long shift; }; +struct host1x_waitchk { + struct host1x_bo *bo; + u32 offset; + u32 syncpt_id; + u32 thresh; +}; + struct host1x_job { /* When refcount goes to zero, job can be freed */ struct kref ref; -- cgit v1.2.3 From 3833d16f1677e19098eb8cc0f0a54f6e209406f4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:32 +0300 Subject: gpu: host1x: Initialize firewall class to the job's one The commands stream is prepended by the jobs class on the CDMA submission, so that explicitly setting a module class in the commands stream isn't necessary. The firewall initializes its class to 0 and the command stream that doesn't explicitly specify the class effectively bypasses the firewall. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Reviewed-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 5f5f8ee6143d..d9933828fe87 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -504,7 +504,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) fw.dev = dev; fw.reloc = job->relocarray; fw.num_relocs = job->num_relocs; - fw.class = 0; + fw.class = job->class; for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; -- cgit v1.2.3 From e5855aa3e681bc417165604212c061e1c4b7cbda Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:33 +0300 Subject: gpu: host1x: Correct host1x_job_pin() error handling In case of relocations / waitchecks patching failure the jobs pins stay referenced till DRM file get closed, wasting memory. Add the missed unpinning. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index d9933828fe87..14f3f957ffab 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -592,22 +592,20 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) err = do_relocs(job, g->bo); if (err) - break; + goto out; err = do_waitchks(job, host, g->bo); if (err) - break; + goto out; } - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) { - err = copy_gathers(job, dev); - if (err) { - host1x_job_unpin(job); - return err; - } - } + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + goto out; + err = copy_gathers(job, dev); out: + if (err) + host1x_job_unpin(job); wmb(); return err; -- cgit v1.2.3 From 47f89c10ddc439638bc0ea51a7f9872e1b7734ce Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:34 +0300 Subject: gpu: host1x: Do not leak BO's phys address to userspace Perform gathers coping before patching them, so that original gathers are left untouched. That's not as bad as leaking kernel addresses, but still doesn't feel right. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 14f3f957ffab..4208329ca2af 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -137,8 +137,9 @@ static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp, * avoid a wrap condition in the HW). */ static int do_waitchks(struct host1x_job *job, struct host1x *host, - struct host1x_bo *patch) + struct host1x_job_gather *g) { + struct host1x_bo *patch = g->bo; int i; /* compare syncpt vs wait threshold */ @@ -165,7 +166,8 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host, wait->syncpt_id, sp->name, wait->thresh, host1x_syncpt_read_min(sp)); - host1x_syncpt_patch_offset(sp, patch, wait->offset); + host1x_syncpt_patch_offset(sp, patch, + g->offset + wait->offset); } wait->bo = NULL; @@ -269,11 +271,12 @@ unpin: return err; } -static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) +static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) { int i = 0; u32 last_page = ~0; void *cmdbuf_page_addr = NULL; + struct host1x_bo *cmdbuf = g->bo; /* pin & patch the relocs for one gather */ for (i = 0; i < job->num_relocs; i++) { @@ -286,6 +289,13 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) if (cmdbuf != reloc->cmdbuf.bo) continue; + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + target = (u32 *)job->gather_copy_mapped + + reloc->cmdbuf.offset / sizeof(u32) + + g->offset / sizeof(u32); + goto patch_reloc; + } + if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) { if (cmdbuf_page_addr) host1x_bo_kunmap(cmdbuf, last_page, @@ -302,6 +312,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) } target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK); +patch_reloc: *target = reloc_addr; } @@ -573,6 +584,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (err) goto out; + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + err = copy_gathers(job, dev); + if (err) + goto out; + } + /* patch gathers */ for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; @@ -581,7 +598,9 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (g->handled) continue; - g->base = job->gather_addr_phys[i]; + /* copy_gathers() sets gathers base if firewall is enabled */ + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + g->base = job->gather_addr_phys[i]; for (j = i + 1; j < job->num_gathers; j++) { if (job->gathers[j].bo == g->bo) { @@ -590,19 +609,15 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) } } - err = do_relocs(job, g->bo); + err = do_relocs(job, g); if (err) - goto out; + break; - err = do_waitchks(job, host, g->bo); + err = do_waitchks(job, host, g); if (err) - goto out; + break; } - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) - goto out; - - err = copy_gathers(job, dev); out: if (err) host1x_job_unpin(job); -- cgit v1.2.3 From 571cbf70c117664d142b34fa2b3b915d8374a327 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:35 +0300 Subject: gpu: host1x: Forbid relocation address shifting in the firewall Incorrectly shifted relocation address will cause a lower memory corruption and likely a hang on a write or a read of an arbitrary data in case of IOMMU absence. As of now, there is no known use for the address shifting and adding a proper shifts / sizes validation is a much more work. Let's forbid shifts in the firewall till a proper validation is implemented. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 4208329ca2af..a911de6386b4 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -330,6 +330,10 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) return false; + /* relocation shift value validation isn't implemented yet */ + if (reloc->shift) + return false; + return true; } -- cgit v1.2.3 From ef81624994845a50e90c6a0ff57f17d1c96bff78 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:36 +0300 Subject: gpu: host1x: Forbid RESTART opcode in the firewall The RESTART opcode terminates the gather and restarts the CDMA fetching from a specified word << 2 relative to the CDMA start address. That shouldn't be allowed to be done by userspace. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index a911de6386b4..54230ec4f81e 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -495,7 +495,6 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) goto out; break; case 4: - case 5: case 14: break; default: -- cgit v1.2.3 From 0f563a4bf66e5182f0882efee398f7e6bc0bb1be Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:37 +0300 Subject: gpu: host1x: Forbid unrelated SETCLASS opcode in the firewall Several channels could be made to write the same unit concurrently via the SETCLASS opcode, trusting userspace is a bad idea. It should be possible to drop the per-client channel reservation and add a per-unit locking by inserting MLOCK's to the command stream to re-allow the SETCLASS opcode, but it will be much more work. Let's forbid the unit-unrelated class changes for now. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 1 + drivers/gpu/drm/tegra/drm.h | 1 + drivers/gpu/drm/tegra/gr2d.c | 7 +++++++ drivers/gpu/host1x/job.c | 24 ++++++++++++++++++++---- include/linux/host1x.h | 3 +++ 5 files changed, 32 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index b44f1eddb570..4a46ba846a0f 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -532,6 +532,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, } job->is_addr_reg = context->client->ops->is_addr_reg; + job->is_valid_class = context->client->ops->is_valid_class; job->syncpt_incrs = syncpt.incrs; job->syncpt_id = syncpt.id; job->timeout = 10000; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 85aa2e3d9d4e..6d6da01282f3 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -83,6 +83,7 @@ struct tegra_drm_client_ops { struct tegra_drm_context *context); void (*close_channel)(struct tegra_drm_context *context); int (*is_addr_reg)(struct device *dev, u32 class, u32 offset); + int (*is_valid_class)(u32 class); int (*submit)(struct tegra_drm_context *context, struct drm_tegra_submit *args, struct drm_device *drm, struct drm_file *file); diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index 02cd3e37a6ec..fbe0b8b25b42 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -109,10 +109,17 @@ static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 offset) return 0; } +static int gr2d_is_valid_class(u32 class) +{ + return (class == HOST1X_CLASS_GR2D || + class == HOST1X_CLASS_GR2D_SB); +} + static const struct tegra_drm_client_ops gr2d_ops = { .open_channel = gr2d_open_channel, .close_channel = gr2d_close_channel, .is_addr_reg = gr2d_is_addr_reg, + .is_valid_class = gr2d_is_valid_class, .submit = tegra_drm_submit, }; diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 54230ec4f81e..ef746f7afb88 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -356,6 +356,9 @@ struct host1x_firewall { static int check_register(struct host1x_firewall *fw, unsigned long offset) { + if (!fw->job->is_addr_reg) + return 0; + if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) { if (!fw->num_relocs) return -EINVAL; @@ -370,6 +373,19 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset) return 0; } +static int check_class(struct host1x_firewall *fw, u32 class) +{ + if (!fw->job->is_valid_class) { + if (fw->class != class) + return -EINVAL; + } else { + if (!fw->job->is_valid_class(fw->class)) + return -EINVAL; + } + + return 0; +} + static int check_mask(struct host1x_firewall *fw) { u32 mask = fw->mask; @@ -443,11 +459,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) { u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + (g->offset / sizeof(u32)); + u32 job_class = fw->class; int err = 0; - if (!fw->job->is_addr_reg) - return 0; - fw->words = g->words; fw->cmdbuf = g->bo; fw->offset = 0; @@ -467,7 +481,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) fw->class = word >> 6 & 0x3ff; fw->mask = word & 0x3f; fw->reg = word >> 16 & 0xfff; - err = check_mask(fw); + err = check_class(fw, job_class); + if (!err) + err = check_mask(fw); if (err) goto out; break; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index ba0b245da732..b5358f855d9e 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -251,6 +251,9 @@ struct host1x_job { /* Check if register is marked as an address reg */ int (*is_addr_reg)(struct device *dev, u32 reg, u32 class); + /* Check if class belongs to the unit */ + int (*is_valid_class)(u32 class); + /* Request a SETCLASS to this class */ u32 class; -- cgit v1.2.3 From a47ac10e6e628740dd122d650afd193941f4770b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:39 +0300 Subject: gpu: host1x: Check waits in the firewall Check waits in the firewall in a way it is done for relocations. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Reviewed-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index ef746f7afb88..f32ae69a68c7 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -31,6 +31,8 @@ #include "job.h" #include "syncpt.h" +#define HOST1X_WAIT_SYNCPT_OFFSET 0x8 + struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, u32 num_cmdbufs, u32 num_relocs, u32 num_waitchks) @@ -337,6 +339,17 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, return true; } +static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf, + unsigned int offset) +{ + offset *= sizeof(u32); + + if (wait->bo != cmdbuf || wait->offset != offset) + return false; + + return true; +} + struct host1x_firewall { struct host1x_job *job; struct device *dev; @@ -344,6 +357,9 @@ struct host1x_firewall { unsigned int num_relocs; struct host1x_reloc *reloc; + unsigned int num_waitchks; + struct host1x_waitchk *waitchk; + struct host1x_bo *cmdbuf; unsigned int offset; @@ -370,6 +386,20 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset) fw->reloc++; } + if (offset == HOST1X_WAIT_SYNCPT_OFFSET) { + if (fw->class != HOST1X_CLASS_HOST1X) + return -EINVAL; + + if (!fw->num_waitchks) + return -EINVAL; + + if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset)) + return -EINVAL; + + fw->num_waitchks--; + fw->waitchk++; + } + return 0; } @@ -534,6 +564,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) fw.dev = dev; fw.reloc = job->relocarray; fw.num_relocs = job->num_relocs; + fw.waitchk = job->waitchk; + fw.num_waitchks = job->num_waitchk; fw.class = job->class; for (i = 0; i < job->num_gathers; i++) { @@ -572,8 +604,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) offset += g->words * sizeof(u32); } - /* No relocs should remain at this point */ - if (fw.num_relocs) + /* No relocs and waitchks should remain at this point */ + if (fw.num_relocs || fw.num_waitchks) return -EINVAL; return 0; -- cgit v1.2.3 From 03ebcaa3de198655ff57197eeabb3958c0d4b73e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:40 +0300 Subject: gpu: host1x: Remove unused 'struct host1x_cmdbuf' The struct host1x_cmdbuf is unused, let's remove it. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 0debd93a1849..4bda51d503ec 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -27,13 +27,6 @@ struct host1x_job_gather { bool handled; }; -struct host1x_cmdbuf { - u32 handle; - u32 offset; - u32 words; - u32 pad; -}; - struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; -- cgit v1.2.3 From 03f0de770eda7a3f2e3950ca9f15d73e1dfd4596 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:41 +0300 Subject: gpu: host1x: Remove unused host1x_cdma_stop() definition There is no host1x_cdma_stop() in the code, let's remove its definition from the header file. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index ec170a78f4e1..286d49386be9 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -88,7 +88,6 @@ struct host1x_cdma { int host1x_cdma_init(struct host1x_cdma *cdma); int host1x_cdma_deinit(struct host1x_cdma *cdma); -void host1x_cdma_stop(struct host1x_cdma *cdma); int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job); void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2); void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job); -- cgit v1.2.3 From 8474b02531c4881a762c52ef869c52429e38633f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 15 Jun 2017 02:18:42 +0300 Subject: gpu: host1x: Refactor channel allocation code This is largely a rewrite of the Host1x channel allocation code, bringing several changes: - The previous code could deadlock due to an interaction between the 'reflock' mutex and CDMA timeout handling. This gets rid of the mutex. - Support for more than 32 channels, required for Tegra186 - General refactoring, including better encapsulation of channel ownership handling into channel.c Signed-off-by: Mikko Perttunen Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/gr2d.c | 4 +- drivers/gpu/drm/tegra/gr3d.c | 4 +- drivers/gpu/drm/tegra/vic.c | 4 +- drivers/gpu/host1x/channel.c | 147 +++++++++++++++++++++++-------------- drivers/gpu/host1x/channel.h | 21 ++++-- drivers/gpu/host1x/debug.c | 47 +++++------- drivers/gpu/host1x/dev.c | 7 +- drivers/gpu/host1x/dev.h | 6 +- drivers/gpu/host1x/hw/channel_hw.c | 4 - include/linux/host1x.h | 1 - 10 files changed, 135 insertions(+), 110 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index fbe0b8b25b42..6ea070da7718 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -38,7 +38,7 @@ static int gr2d_init(struct host1x_client *client) client->syncpts[0] = host1x_syncpt_request(client->dev, flags); if (!client->syncpts[0]) { - host1x_channel_free(gr2d->channel); + host1x_channel_put(gr2d->channel); return -ENOMEM; } @@ -57,7 +57,7 @@ static int gr2d_exit(struct host1x_client *client) return err; host1x_syncpt_free(client->syncpts[0]); - host1x_channel_free(gr2d->channel); + host1x_channel_put(gr2d->channel); return 0; } diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 13f0d1b7cd98..cee2ab645cde 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -48,7 +48,7 @@ static int gr3d_init(struct host1x_client *client) client->syncpts[0] = host1x_syncpt_request(client->dev, flags); if (!client->syncpts[0]) { - host1x_channel_free(gr3d->channel); + host1x_channel_put(gr3d->channel); return -ENOMEM; } @@ -67,7 +67,7 @@ static int gr3d_exit(struct host1x_client *client) return err; host1x_syncpt_free(client->syncpts[0]); - host1x_channel_free(gr3d->channel); + host1x_channel_put(gr3d->channel); return 0; } diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index cd804e404a11..47cb1aaa58b1 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -182,7 +182,7 @@ static int vic_init(struct host1x_client *client) free_syncpt: host1x_syncpt_free(client->syncpts[0]); free_channel: - host1x_channel_free(vic->channel); + host1x_channel_put(vic->channel); detach_device: if (tegra->domain) iommu_detach_device(tegra->domain, vic->dev); @@ -203,7 +203,7 @@ static int vic_exit(struct host1x_client *client) return err; host1x_syncpt_free(client->syncpts[0]); - host1x_channel_free(vic->channel); + host1x_channel_put(vic->channel); if (vic->domain) { iommu_detach_device(vic->domain, vic->dev); diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 8f437d924c10..db9b91d1384c 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -24,19 +24,33 @@ #include "job.h" /* Constructor for the host1x device list */ -int host1x_channel_list_init(struct host1x *host) +int host1x_channel_list_init(struct host1x_channel_list *chlist, + unsigned int num_channels) { - INIT_LIST_HEAD(&host->chlist.list); - mutex_init(&host->chlist_mutex); - - if (host->info->nb_channels > BITS_PER_LONG) { - WARN(1, "host1x hardware has more channels than supported by the driver\n"); - return -ENOSYS; + chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel), + GFP_KERNEL); + if (!chlist->channels) + return -ENOMEM; + + chlist->allocated_channels = + kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long), + GFP_KERNEL); + if (!chlist->allocated_channels) { + kfree(chlist->channels); + return -ENOMEM; } + bitmap_zero(chlist->allocated_channels, num_channels); + return 0; } +void host1x_channel_list_free(struct host1x_channel_list *chlist) +{ + kfree(chlist->allocated_channels); + kfree(chlist->channels); +} + int host1x_job_submit(struct host1x_job *job) { struct host1x *host = dev_get_drvdata(job->channel->dev->parent); @@ -47,86 +61,107 @@ EXPORT_SYMBOL(host1x_job_submit); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel) { - int err = 0; + kref_get(&channel->refcount); - mutex_lock(&channel->reflock); + return channel; +} +EXPORT_SYMBOL(host1x_channel_get); - if (channel->refcount == 0) - err = host1x_cdma_init(&channel->cdma); +/** + * host1x_channel_get_index() - Attempt to get channel reference by index + * @host: Host1x device object + * @index: Index of channel + * + * If channel number @index is currently allocated, increase its refcount + * and return a pointer to it. Otherwise, return NULL. + */ +struct host1x_channel *host1x_channel_get_index(struct host1x *host, + unsigned int index) +{ + struct host1x_channel *ch = &host->channel_list.channels[index]; - if (!err) - channel->refcount++; + if (!kref_get_unless_zero(&ch->refcount)) + return NULL; - mutex_unlock(&channel->reflock); + return ch; +} + +static void release_channel(struct kref *kref) +{ + struct host1x_channel *channel = + container_of(kref, struct host1x_channel, refcount); + struct host1x *host = dev_get_drvdata(channel->dev->parent); + struct host1x_channel_list *chlist = &host->channel_list; + + host1x_hw_cdma_stop(host, &channel->cdma); + host1x_cdma_deinit(&channel->cdma); - return err ? NULL : channel; + clear_bit(channel->id, chlist->allocated_channels); } -EXPORT_SYMBOL(host1x_channel_get); void host1x_channel_put(struct host1x_channel *channel) { - mutex_lock(&channel->reflock); + kref_put(&channel->refcount, release_channel); +} +EXPORT_SYMBOL(host1x_channel_put); - if (channel->refcount == 1) { - struct host1x *host = dev_get_drvdata(channel->dev->parent); +static struct host1x_channel *acquire_unused_channel(struct host1x *host) +{ + struct host1x_channel_list *chlist = &host->channel_list; + unsigned int max_channels = host->info->nb_channels; + unsigned int index; - host1x_hw_cdma_stop(host, &channel->cdma); - host1x_cdma_deinit(&channel->cdma); + index = find_first_zero_bit(chlist->allocated_channels, max_channels); + if (index >= max_channels) { + dev_err(host->dev, "failed to find free channel\n"); + return NULL; } - channel->refcount--; + chlist->channels[index].id = index; - mutex_unlock(&channel->reflock); + set_bit(index, chlist->allocated_channels); + + return &chlist->channels[index]; } -EXPORT_SYMBOL(host1x_channel_put); +/** + * host1x_channel_request() - Allocate a channel + * @device: Host1x unit this channel will be used to send commands to + * + * Allocates a new host1x channel for @device. If there are no free channels, + * this will sleep until one becomes available. May return NULL if CDMA + * initialization fails. + */ struct host1x_channel *host1x_channel_request(struct device *dev) { struct host1x *host = dev_get_drvdata(dev->parent); - unsigned int max_channels = host->info->nb_channels; - struct host1x_channel *channel = NULL; - unsigned long index; + struct host1x_channel_list *chlist = &host->channel_list; + struct host1x_channel *channel; int err; - mutex_lock(&host->chlist_mutex); + channel = acquire_unused_channel(host); + if (!channel) + return NULL; - index = find_first_zero_bit(&host->allocated_channels, max_channels); - if (index >= max_channels) - goto fail; + kref_init(&channel->refcount); + mutex_init(&channel->submitlock); + channel->dev = dev; - channel = kzalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) + err = host1x_hw_channel_init(host, channel, channel->id); + if (err < 0) goto fail; - err = host1x_hw_channel_init(host, channel, index); + err = host1x_cdma_init(&channel->cdma); if (err < 0) goto fail; - /* Link device to host1x_channel */ - channel->dev = dev; - - /* Add to channel list */ - list_add_tail(&channel->list, &host->chlist.list); - - host->allocated_channels |= BIT(index); - - mutex_unlock(&host->chlist_mutex); return channel; fail: - dev_err(dev, "failed to init channel\n"); - kfree(channel); - mutex_unlock(&host->chlist_mutex); - return NULL; -} -EXPORT_SYMBOL(host1x_channel_request); + clear_bit(channel->id, chlist->allocated_channels); -void host1x_channel_free(struct host1x_channel *channel) -{ - struct host1x *host = dev_get_drvdata(channel->dev->parent); + dev_err(dev, "failed to initialize channel\n"); - host->allocated_channels &= ~BIT(channel->id); - list_del(&channel->list); - kfree(channel); + return NULL; } -EXPORT_SYMBOL(host1x_channel_free); +EXPORT_SYMBOL(host1x_channel_request); diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h index df767cf90d51..7068e42d42df 100644 --- a/drivers/gpu/host1x/channel.h +++ b/drivers/gpu/host1x/channel.h @@ -20,17 +20,21 @@ #define __HOST1X_CHANNEL_H #include +#include #include "cdma.h" struct host1x; +struct host1x_channel; -struct host1x_channel { - struct list_head list; +struct host1x_channel_list { + struct host1x_channel *channels; + unsigned long *allocated_channels; +}; - unsigned int refcount; +struct host1x_channel { + struct kref refcount; unsigned int id; - struct mutex reflock; struct mutex submitlock; void __iomem *regs; struct device *dev; @@ -38,9 +42,10 @@ struct host1x_channel { }; /* channel list operations */ -int host1x_channel_list_init(struct host1x *host); - -#define host1x_for_each_channel(host, channel) \ - list_for_each_entry(channel, &host->chlist.list, list) +int host1x_channel_list_init(struct host1x_channel_list *chlist, + unsigned int num_channels); +void host1x_channel_list_free(struct host1x_channel_list *chlist); +struct host1x_channel *host1x_channel_get_index(struct host1x *host, + unsigned int index); #endif diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index d9330fcc62ad..2aae0e63214c 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -43,24 +43,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...) o->fn(o->ctx, o->buf, len); } -static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo) +static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) { struct host1x *m = dev_get_drvdata(ch->dev->parent); struct output *o = data; - mutex_lock(&ch->reflock); + mutex_lock(&ch->cdma.lock); - if (ch->refcount) { - mutex_lock(&ch->cdma.lock); + if (show_fifo) + host1x_hw_show_channel_fifo(m, ch, o); - if (show_fifo) - host1x_hw_show_channel_fifo(m, ch, o); + host1x_hw_show_channel_cdma(m, ch, o); - host1x_hw_show_channel_cdma(m, ch, o); - mutex_unlock(&ch->cdma.lock); - } - - mutex_unlock(&ch->reflock); + mutex_unlock(&ch->cdma.lock); return 0; } @@ -94,28 +89,22 @@ static void show_syncpts(struct host1x *m, struct output *o) host1x_debug_output(o, "\n"); } -static void show_all(struct host1x *m, struct output *o) +static void show_all(struct host1x *m, struct output *o, bool show_fifo) { - struct host1x_channel *ch; + int i; host1x_hw_show_mlocks(m, o); show_syncpts(m, o); host1x_debug_output(o, "---- channels ----\n"); - host1x_for_each_channel(m, ch) - show_channels(ch, o, true); -} - -static void show_all_no_fifo(struct host1x *host1x, struct output *o) -{ - struct host1x_channel *ch; - - host1x_hw_show_mlocks(host1x, o); - show_syncpts(host1x, o); - host1x_debug_output(o, "---- channels ----\n"); + for (i = 0; i < m->info->nb_channels; ++i) { + struct host1x_channel *ch = host1x_channel_get_index(m, i); - host1x_for_each_channel(host1x, ch) - show_channels(ch, o, false); + if (ch) { + show_channel(ch, o, show_fifo); + host1x_channel_put(ch); + } + } } static int host1x_debug_show_all(struct seq_file *s, void *unused) @@ -125,7 +114,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused) .ctx = s }; - show_all(s->private, &o); + show_all(s->private, &o, true); return 0; } @@ -137,7 +126,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused) .ctx = s }; - show_all_no_fifo(s->private, &o); + show_all(s->private, &o, false); return 0; } @@ -216,7 +205,7 @@ void host1x_debug_dump(struct host1x *host1x) .fn = write_to_printk }; - show_all(host1x, &o); + show_all(host1x, &o, true); } void host1x_debug_dump_syncpts(struct host1x *host1x) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index f05ebb14fa63..5c1c711a21af 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -198,7 +198,8 @@ static int host1x_probe(struct platform_device *pdev) host->iova_end = geometry->aperture_end; } - err = host1x_channel_list_init(host); + err = host1x_channel_list_init(&host->channel_list, + host->info->nb_channels); if (err) { dev_err(&pdev->dev, "failed to initialize channel list\n"); goto fail_detach_device; @@ -207,7 +208,7 @@ static int host1x_probe(struct platform_device *pdev) err = clk_prepare_enable(host->clk); if (err < 0) { dev_err(&pdev->dev, "failed to enable clock\n"); - goto fail_detach_device; + goto fail_free_channels; } err = reset_control_deassert(host->rst); @@ -244,6 +245,8 @@ fail_reset_assert: reset_control_assert(host->rst); fail_unprepare_disable: clk_disable_unprepare(host->clk); +fail_free_channels: + host1x_channel_list_free(&host->channel_list); fail_detach_device: if (host->domain) { put_iova_domain(&host->iova); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 229d08b6a45e..ffdbc15b749b 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -129,10 +129,8 @@ struct host1x { struct host1x_syncpt *nop_sp; struct mutex syncpt_mutex; - struct mutex chlist_mutex; - struct host1x_channel chlist; - unsigned long allocated_channels; - unsigned int num_allocated_channels; + + struct host1x_channel_list channel_list; struct dentry *debugfs; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 5e8df78b7acd..8447a56c41ca 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -181,10 +181,6 @@ error: static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, unsigned int index) { - ch->id = index; - mutex_init(&ch->reflock); - mutex_init(&ch->submitlock); - ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE; return 0; } diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 476da0e06bb2..630b1a98ab58 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -172,7 +172,6 @@ struct host1x_channel; struct host1x_job; struct host1x_channel *host1x_channel_request(struct device *dev); -void host1x_channel_free(struct host1x_channel *channel); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); -- cgit v1.2.3 From 43240bbd871e2c8f89584d369278a3d18680d9ea Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:43 +0300 Subject: gpu: host1x: At first try a non-blocking allocation for the gather copy The blocking gather copy allocation is a major performance downside of the Host1x firewall, it may take hundreds milliseconds which is unacceptable for the real-time graphics operations. Let's try a non-blocking allocation first as a least invasive solution, it makes opentegra (Xorg driver) performance indistinguishable with/without the firewall. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index f32ae69a68c7..bee504406cfc 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -574,12 +574,20 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) size += g->words * sizeof(u32); } + /* + * Try a non-blocking allocation from a higher priority pools first, + * as awaiting for the allocation here is a major performance hit. + */ job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, - GFP_KERNEL); - if (!job->gather_copy_mapped) { - job->gather_copy_mapped = NULL; + GFP_NOWAIT); + + /* the higher priority allocation failed, try the generic-blocking */ + if (!job->gather_copy_mapped) + job->gather_copy_mapped = dma_alloc_wc(dev, size, + &job->gather_copy, + GFP_KERNEL); + if (!job->gather_copy_mapped) return -ENOMEM; - } job->gather_copy_size = size; -- cgit v1.2.3