From 603e4922f1c81fc2ed3a87b4f91a8d3aafc7e093 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 May 2021 10:25:26 +0300 Subject: remove the raw driver The raw driver used to provide direct unbuffered access to block devices before O_DIRECT was invented. It has been obsolete for more than a decade. Acked-by: Greg Kroah-Hartman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/Pine.LNX.4.64.0703180754060.6605@CPE00045a9c397f-CM001225dbafb6/ Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210531072526.97052-1-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/raw.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/uapi/linux/raw.h (limited to 'include/uapi') diff --git a/include/uapi/linux/raw.h b/include/uapi/linux/raw.h deleted file mode 100644 index 47874919d0b9..000000000000 --- a/include/uapi/linux/raw.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_RAW_H -#define __LINUX_RAW_H - -#include - -#define RAW_SETBIND _IO( 0xac, 0 ) -#define RAW_GETBIND _IO( 0xac, 1 ) - -struct raw_config_request -{ - int raw_minor; - __u64 block_major; - __u64 block_minor; -}; - -#endif /* __LINUX_RAW_H */ -- cgit v1.2.3 From 8e8125f192288802267157f613c0ca654dfbde8e Mon Sep 17 00:00:00 2001 From: Yuri Nudelman Date: Tue, 25 May 2021 14:49:52 +0300 Subject: habanalabs: add debug flag to prevent failure on timeout Sometimes it is useful to allow the command to continue running despite the timeout occurred, to differentiate between really stuck or just very time consuming commands. This can be achieved by passing a new debug flag alongside the cs, HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT. Anyway, if the timeout occurred, a warning print shall be issued, however this shall not fail the submission. Signed-off-by: Yuri Nudelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 25 +++++++++++++++++----- drivers/misc/habanalabs/common/habanalabs.h | 5 +++++ include/uapi/misc/habanalabs.h | 1 + 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index ecd96fbe3150..6d51f54030c1 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -556,6 +556,13 @@ out: else if (!cs->submitted) cs->fence->error = -EBUSY; + if (unlikely(cs->skip_reset_on_timeout)) { + dev_err(hdev->dev, + "Command submission %llu completed after %llu (s)\n", + cs->sequence, + div_u64(jiffies - cs->submission_time_jiffies, HZ)); + } + if (cs->timestamp) cs->fence->timestamp = ktime_get(); complete_all(&cs->fence->completion); @@ -571,6 +578,8 @@ static void cs_timedout(struct work_struct *work) int rc; struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); + bool skip_reset_on_timeout = cs->skip_reset_on_timeout; + rc = cs_get_unless_zero(cs); if (!rc) return; @@ -581,7 +590,8 @@ static void cs_timedout(struct work_struct *work) } /* Mark the CS is timed out so we won't try to cancel its TDR */ - cs->timedout = true; + if (likely(!skip_reset_on_timeout)) + cs->timedout = true; hdev = cs->ctx->hdev; @@ -613,10 +623,12 @@ static void cs_timedout(struct work_struct *work) cs_put(cs); - if (hdev->reset_on_lockup) - hl_device_reset(hdev, HL_RESET_TDR); - else - hdev->needs_reset = true; + if (likely(!skip_reset_on_timeout)) { + if (hdev->reset_on_lockup) + hl_device_reset(hdev, HL_RESET_TDR); + else + hdev->needs_reset = true; + } } static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, @@ -650,6 +662,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->type = cs_type; cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); cs->timeout_jiffies = timeout; + cs->skip_reset_on_timeout = + !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); + cs->submission_time_jiffies = jiffies; INIT_LIST_HEAD(&cs->job_list); INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); kref_init(&cs->refcount); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index e751868b3ed3..56d2f41f8893 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1421,6 +1421,7 @@ struct hl_userptr { * @staged_sequence: the sequence of the staged submission this CS is part of, * relevant only if staged_cs is set. * @timeout_jiffies: cs timeout in jiffies. + * @submission_time_jiffies: submission time of the cs * @type: CS_TYPE_*. * @submitted: true if CS was submitted to H/W. * @completed: true if CS was completed by device. @@ -1433,6 +1434,8 @@ struct hl_userptr { * @staged_first: true if this is the first staged CS and we need to receive * timeout for this CS. * @staged_cs: true if this CS is part of a staged submission. + * @skip_reset_on_timeout: true if we shall not reset the device in case + * timeout occurs (debug scenario). */ struct hl_cs { u16 *jobs_in_queue_cnt; @@ -1450,6 +1453,7 @@ struct hl_cs { u64 sequence; u64 staged_sequence; u64 timeout_jiffies; + u64 submission_time_jiffies; enum hl_cs_type type; u8 submitted; u8 completed; @@ -1460,6 +1464,7 @@ struct hl_cs { u8 staged_last; u8 staged_first; u8 staged_cs; + u8 skip_reset_on_timeout; }; /** diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6d2d34c9f375..a47485a8d411 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -664,6 +664,7 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 #define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 #define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 +#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 #define HL_CS_STATUS_SUCCESS 0 -- cgit v1.2.3 From e307b302be8beb7fb59aa16621d5081b69397076 Mon Sep 17 00:00:00 2001 From: Yuri Nudelman Date: Mon, 24 May 2021 11:25:21 +0300 Subject: habanalabs: added open_stats info ioctl In a system with multiple ASICs, there is a need to provide monitoring tools with information on how long a device was opened and how many times a device was opened. Therefore, we add a new opcode to the INFO ioctl to provide that information. Signed-off-by: Yuri Nudelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 3 +++ drivers/misc/habanalabs/common/habanalabs.h | 8 ++++++++ drivers/misc/habanalabs/common/habanalabs_drv.c | 3 +++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 21 +++++++++++++++++++++ include/uapi/misc/habanalabs.h | 12 ++++++++++++ 5 files changed, 47 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index e56f5170e338..37ce38d9a1a7 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -132,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp) dev_warn(hdev->dev, "Device is still in use because there are live CS and/or memory mappings\n"); + hdev->last_open_session_duration_jif = + jiffies - hdev->last_successful_open_jif; + return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 244fbf209d34..6c9a81c2cfe7 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2137,6 +2137,11 @@ struct hl_mmu_funcs { * the error will be ignored by the driver during * device initialization. Mainly used to debug and * workaround firmware bugs + * @last_successful_open_jif: timestamp (jiffies) of the last successful + * device open. + * @last_open_session_duration_jif: duration (jiffies) of the last device open + * session. + * @open_counter: number of successful device open operations. * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card @@ -2259,6 +2264,9 @@ struct hl_device { u64 max_power; u64 clock_gating_mask; u64 boot_error_status_mask; + u64 last_successful_open_jif; + u64 last_open_session_duration_jif; + u64 open_counter; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 4d377a39df13..4194cda2d04c 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); + hdev->open_counter++; + hdev->last_successful_open_jif = jiffies; + return 0; out_err: diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 6604d30246e6..f4dda7b4acdd 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args) min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0; } +static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_open_stats_info open_stats_info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + open_stats_info.last_open_period_ms = jiffies64_to_msecs( + hdev->last_open_session_duration_jif); + open_stats_info.open_counter = hdev->open_counter; + + return copy_to_user(out, &open_stats_info, + min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_POWER: return power_info(hpriv, args); + case HL_INFO_OPEN_STATS: + return open_stats_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a47485a8d411..a47a731e4527 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -313,6 +313,7 @@ enum hl_device_status { * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -331,6 +332,7 @@ enum hl_device_status { #define HL_INFO_TOTAL_ENERGY 15 #define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_POWER 17 +#define HL_INFO_OPEN_STATS 18 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -444,6 +446,16 @@ struct hl_pll_frequency_info { __u16 output[HL_PLL_NUM_OUTPUTS]; }; +/** + * struct hl_open_stats_info - device open statistics information + * @open_counter: ever growing counter, increased on each successful dev open + * @last_open_period_ms: duration (ms) device was open last time + */ +struct hl_open_stats_info { + __u64 open_counter; + __u64 last_open_period_ms; +}; + /** * struct hl_power_info - power information * @power: power consumption -- cgit v1.2.3