summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2023-10-23 20:06:58 +0200
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2023-10-23 20:06:58 +0200
commit8aa49284f3673879dcabfb116f7cac265b4cb0f5 (patch)
treec55b0d4c3f3f4dfe758ea5ddfb51b86ef2cdd3fe
parentMerge branch 'thermal-core' (diff)
parentselftests/thermel/intel: Add test to read power floor status (diff)
downloadlinux-8aa49284f3673879dcabfb116f7cac265b4cb0f5.tar.xz
linux-8aa49284f3673879dcabfb116f7cac265b4cb0f5.zip
Merge branch 'thermal-intel'
Merge changes in Intel thermal control drivers for 6.7-rc1: - Add power floor notifications support to the int340x thermal control driver (Srinivas Pandruvada). - Rework updating trip points in the int340x thermal driver so that it does not access thermal zone internals directly (Rafael Wysocki). - Use param_get_byte() instead of param_get_int() as the max_idle module parameter .get() callback in the Intel powerclamp thermal driver to avoid possible out-of-bounds access (David Arcari). - Add workload hints support to the the int340x thermal driver (Srinivas Pandruvada). * thermal-intel: selftests/thermel/intel: Add test to read power floor status thermal: int340x: processor_thermal: Enable power floor support thermal: int340x: processor_thermal: Handle power floor interrupts thermal: int340x: processor_thermal: Support power floor notifications thermal: int340x: processor_thermal: Set feature mask before proc_thermal_add thermal: int340x: processor_thermal: Common function to clear SOC interrupt thermal: int340x: processor_thermal: Move interrupt status MMIO offset to common header thermal: intel: powerclamp: fix mismatch in get function for max_idle thermal: int340x: Use thermal_zone_for_each_trip() thermal: int340x: processor_thermal: Ack all PCI interrupts thermal: int340x: Add ArrowLake-S PCI ID selftests/thermel/intel: Add test to read workload hint thermal: int340x: Handle workload hint interrupts thermal: int340x: processor_thermal: Add workload type hint interface thermal: int340x: Remove PROC_THERMAL_FEATURE_WLT_REQ for Meteor Lake thermal: int340x: processor_thermal: Use non MSI interrupts by default thermal: int340x: processor_thermal: Add interrupt configuration function thermal: int340x: processor_thermal: Move mailbox code to common module
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst64
-rw-r--r--drivers/thermal/intel/int340x_thermal/Makefile3
-rw-r--r--drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c78
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.c85
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.h33
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c121
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c3
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c179
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_power_floor.c126
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_wt_hint.c255
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_wt_req.c136
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c2
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c108
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c157
17 files changed, 1170 insertions, 206 deletions
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
index 9ab4316322a1..8fb8c5b2d685 100644
--- a/Documentation/driver-api/thermal/intel_dptf.rst
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -164,6 +164,16 @@ ABI.
``power_limit_1_tmax_us`` (RO)
Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL
+``power_floor_status`` (RO)
+ When set to 1, the power floor of the system in the current
+ configuration has been reached. It needs to be reconfigured to allow
+ power to be reduced any further.
+
+``power_floor_enable`` (RW)
+ When set to 1, enable reading and notification of the power floor
+ status. Notifications are triggered for the power_floor_status
+ attribute value changes.
+
:file:`/sys/bus/pci/devices/0000\:00\:04.0/`
``tcc_offset_degree_celsius`` (RW)
@@ -315,3 +325,57 @@ DPTF Fan Control
----------------------------------------
Refer to Documentation/admin-guide/acpi/fan_performance_states.rst
+
+Workload Type Hints
+----------------------------------------
+
+The firmware in Meteor Lake processor generation is capable of identifying
+workload type and passing hints regarding it to the OS. A special sysfs
+interface is provided to allow user space to obtain workload type hints from
+the firmware and control the rate at which they are provided.
+
+User space can poll attribute "workload_type_index" for the current hint or
+can receive a notification whenever the value of this attribute is updated.
+
+file:`/sys/bus/pci/devices/0000:00:04.0/workload_hint/`
+Segment 0, bus 0, device 4, function 0 is reserved for the processor thermal
+device on all Intel client processors. So, the above path doesn't change
+based on the processor generation.
+
+``workload_hint_enable`` (RW)
+ Enable firmware to send workload type hints to user space.
+
+``notification_delay_ms`` (RW)
+ Minimum delay in milliseconds before firmware will notify OS. This is
+ for the rate control of notifications. This delay is between changing
+ the workload type prediction in the firmware and notifying the OS about
+ the change. The default delay is 1024 ms. The delay of 0 is invalid.
+ The delay is rounded up to the nearest power of 2 to simplify firmware
+ programming of the delay value. The read of notification_delay_ms
+ attribute shows the effective value used.
+
+``workload_type_index`` (RO)
+ Predicted workload type index. User space can get notification of
+ change via existing sysfs attribute change notification mechanism.
+
+ The supported index values and their meaning for the Meteor Lake
+ processor generation are as follows:
+
+ 0 - Idle: System performs no tasks, power and idle residency are
+ consistently low for long periods of time.
+
+ 1 – Battery Life: Power is relatively low, but the processor may
+ still be actively performing a task, such as video playback for
+ a long period of time.
+
+ 2 – Sustained: Power level that is relatively high for a long period
+ of time, with very few to no periods of idleness, which will
+ eventually exhaust RAPL Power Limit 1 and 2.
+
+ 3 – Bursty: Consumes a relatively constant average amount of power, but
+ periods of relative idleness are interrupted by bursts of
+ activity. The bursts are relatively short and the periods of
+ relative idleness between them typically prevent RAPL Power
+ Limit 1 from being exhausted.
+
+ 4 – Unknown: Can't classify.
diff --git a/drivers/thermal/intel/int340x_thermal/Makefile b/drivers/thermal/intel/int340x_thermal/Makefile
index 4e852ce4a5d5..fe3f43924525 100644
--- a/drivers/thermal/intel/int340x_thermal/Makefile
+++ b/drivers/thermal/intel/int340x_thermal/Makefile
@@ -10,5 +10,8 @@ obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device_pci.o
obj-$(CONFIG_PROC_THERMAL_MMIO_RAPL) += processor_thermal_rapl.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_rfim.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_mbox.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_wt_req.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_wt_hint.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_power_floor.o
obj-$(CONFIG_INT3406_THERMAL) += int3406_thermal.o
obj-$(CONFIG_ACPI_THERMAL_REL) += acpi_thermal_rel.o
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
index 89cf007146ea..a03b67579dd9 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
@@ -67,6 +67,16 @@ static struct thermal_zone_device_ops int340x_thermal_zone_ops = {
.critical = int340x_thermal_critical,
};
+static inline void *int_to_trip_priv(int i)
+{
+ return (void *)(long)i;
+}
+
+static inline int trip_priv_to_int(const struct thermal_trip *trip)
+{
+ return (long)trip->priv;
+}
+
static int int340x_thermal_read_trips(struct acpi_device *zone_adev,
struct thermal_trip *zone_trips,
int trip_cnt)
@@ -101,6 +111,7 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev,
break;
zone_trips[trip_cnt].type = THERMAL_TRIP_ACTIVE;
+ zone_trips[trip_cnt].priv = int_to_trip_priv(i);
trip_cnt++;
}
@@ -212,45 +223,40 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone *int34x_zone)
}
EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove);
-void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone)
+static int int340x_update_one_trip(struct thermal_trip *trip, void *arg)
{
- struct acpi_device *zone_adev = int34x_zone->adev;
- struct thermal_trip *zone_trips = int34x_zone->trips;
- int trip_cnt = int34x_zone->zone->num_trips;
- int act_trip_nr = 0;
- int i;
-
- mutex_lock(&int34x_zone->zone->lock);
-
- for (i = int34x_zone->aux_trip_nr; i < trip_cnt; i++) {
- int temp, err;
-
- switch (zone_trips[i].type) {
- case THERMAL_TRIP_CRITICAL:
- err = thermal_acpi_critical_trip_temp(zone_adev, &temp);
- break;
- case THERMAL_TRIP_HOT:
- err = thermal_acpi_hot_trip_temp(zone_adev, &temp);
- break;
- case THERMAL_TRIP_PASSIVE:
- err = thermal_acpi_passive_trip_temp(zone_adev, &temp);
- break;
- case THERMAL_TRIP_ACTIVE:
- err = thermal_acpi_active_trip_temp(zone_adev, act_trip_nr++,
- &temp);
- break;
- default:
- err = -ENODEV;
- }
- if (err) {
- zone_trips[i].temperature = THERMAL_TEMP_INVALID;
- continue;
- }
-
- zone_trips[i].temperature = temp;
+ struct acpi_device *zone_adev = arg;
+ int temp, err;
+
+ switch (trip->type) {
+ case THERMAL_TRIP_CRITICAL:
+ err = thermal_acpi_critical_trip_temp(zone_adev, &temp);
+ break;
+ case THERMAL_TRIP_HOT:
+ err = thermal_acpi_hot_trip_temp(zone_adev, &temp);
+ break;
+ case THERMAL_TRIP_PASSIVE:
+ err = thermal_acpi_passive_trip_temp(zone_adev, &temp);
+ break;
+ case THERMAL_TRIP_ACTIVE:
+ err = thermal_acpi_active_trip_temp(zone_adev,
+ trip_priv_to_int(trip),
+ &temp);
+ break;
+ default:
+ err = -ENODEV;
}
+ if (err)
+ temp = THERMAL_TEMP_INVALID;
- mutex_unlock(&int34x_zone->zone->lock);
+ trip->temperature = temp;
+ return 0;
+}
+
+void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone)
+{
+ thermal_zone_for_each_trip(int34x_zone->zone, int340x_update_one_trip,
+ int34x_zone->adev);
}
EXPORT_SYMBOL_GPL(int340x_thermal_update_trips);
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
index 3ca0a2f5937f..649f67fdf345 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
@@ -26,6 +26,48 @@ static ssize_t power_limit_##index##_##suffix##_show(struct device *dev, \
(unsigned long)proc_dev->power_limits[index].suffix * 1000); \
}
+static ssize_t power_floor_status_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct proc_thermal_device *proc_dev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = proc_thermal_read_power_floor_status(proc_dev);
+
+ return sysfs_emit(buf, "%d\n", ret);
+}
+
+static ssize_t power_floor_enable_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct proc_thermal_device *proc_dev = dev_get_drvdata(dev);
+ bool ret;
+
+ ret = proc_thermal_power_floor_get_state(proc_dev);
+
+ return sysfs_emit(buf, "%d\n", ret);
+}
+
+static ssize_t power_floor_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct proc_thermal_device *proc_dev = dev_get_drvdata(dev);
+ u8 state;
+ int ret;
+
+ if (kstrtou8(buf, 0, &state))
+ return -EINVAL;
+
+ ret = proc_thermal_power_floor_set_state(proc_dev, !!state);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
POWER_LIMIT_SHOW(0, min_uw)
POWER_LIMIT_SHOW(0, max_uw)
POWER_LIMIT_SHOW(0, step_uw)
@@ -50,6 +92,9 @@ static DEVICE_ATTR_RO(power_limit_1_step_uw);
static DEVICE_ATTR_RO(power_limit_1_tmin_us);
static DEVICE_ATTR_RO(power_limit_1_tmax_us);
+static DEVICE_ATTR_RO(power_floor_status);
+static DEVICE_ATTR_RW(power_floor_enable);
+
static struct attribute *power_limit_attrs[] = {
&dev_attr_power_limit_0_min_uw.attr,
&dev_attr_power_limit_1_min_uw.attr,
@@ -61,12 +106,30 @@ static struct attribute *power_limit_attrs[] = {
&dev_attr_power_limit_1_tmin_us.attr,
&dev_attr_power_limit_0_tmax_us.attr,
&dev_attr_power_limit_1_tmax_us.attr,
+ &dev_attr_power_floor_status.attr,
+ &dev_attr_power_floor_enable.attr,
NULL
};
+static umode_t power_limit_attr_visible(struct kobject *kobj, struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct proc_thermal_device *proc_dev;
+
+ if (attr != &dev_attr_power_floor_status.attr && attr != &dev_attr_power_floor_enable.attr)
+ return attr->mode;
+
+ proc_dev = dev_get_drvdata(dev);
+ if (!proc_dev || !(proc_dev->mmio_feature_mask & PROC_THERMAL_FEATURE_POWER_FLOOR))
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group power_limit_attribute_group = {
.attrs = power_limit_attrs,
- .name = "power_limits"
+ .name = "power_limits",
+ .is_visible = power_limit_attr_visible,
};
static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
@@ -346,12 +409,18 @@ int proc_thermal_mmio_add(struct pci_dev *pdev,
}
}
- if (feature_mask & PROC_THERMAL_FEATURE_MBOX) {
- ret = proc_thermal_mbox_add(pdev, proc_priv);
+ if (feature_mask & PROC_THERMAL_FEATURE_WT_REQ) {
+ ret = proc_thermal_wt_req_add(pdev, proc_priv);
if (ret) {
dev_err(&pdev->dev, "failed to add MBOX interface\n");
goto err_rem_rfim;
}
+ } else if (feature_mask & PROC_THERMAL_FEATURE_WT_HINT) {
+ ret = proc_thermal_wt_hint_add(pdev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to add WT Hint\n");
+ goto err_rem_rfim;
+ }
}
return 0;
@@ -374,12 +443,18 @@ void proc_thermal_mmio_remove(struct pci_dev *pdev, struct proc_thermal_device *
proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DVFS)
proc_thermal_rfim_remove(pdev);
- if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_MBOX)
- proc_thermal_mbox_remove(pdev);
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_POWER_FLOOR)
+ proc_thermal_power_floor_set_state(proc_priv, false);
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_WT_REQ)
+ proc_thermal_wt_req_remove(pdev);
+ else if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_WT_HINT)
+ proc_thermal_wt_hint_remove(pdev);
}
EXPORT_SYMBOL_GPL(proc_thermal_mmio_remove);
MODULE_IMPORT_NS(INTEL_TCC);
+MODULE_IMPORT_NS(INT340X_THERMAL);
MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
index 7acaa8f1b896..95c6013a33fb 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
@@ -10,6 +10,7 @@
#include <linux/intel_rapl.h>
#define PCI_DEVICE_ID_INTEL_ADL_THERMAL 0x461d
+#define PCI_DEVICE_ID_INTEL_ARL_S_THERMAL 0xAD03
#define PCI_DEVICE_ID_INTEL_BDW_THERMAL 0x1603
#define PCI_DEVICE_ID_INTEL_BSW_THERMAL 0x22DC
@@ -59,8 +60,10 @@ struct rapl_mmio_regs {
#define PROC_THERMAL_FEATURE_RAPL 0x01
#define PROC_THERMAL_FEATURE_FIVR 0x02
#define PROC_THERMAL_FEATURE_DVFS 0x04
-#define PROC_THERMAL_FEATURE_MBOX 0x08
+#define PROC_THERMAL_FEATURE_WT_REQ 0x08
#define PROC_THERMAL_FEATURE_DLVR 0x10
+#define PROC_THERMAL_FEATURE_WT_HINT 0x20
+#define PROC_THERMAL_FEATURE_POWER_FLOOR 0x40
#if IS_ENABLED(CONFIG_PROC_THERMAL_MMIO_RAPL)
int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
@@ -80,13 +83,37 @@ static void __maybe_unused proc_thermal_rapl_remove(void)
int proc_thermal_rfim_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
void proc_thermal_rfim_remove(struct pci_dev *pdev);
-int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
-void proc_thermal_mbox_remove(struct pci_dev *pdev);
+int proc_thermal_wt_req_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+void proc_thermal_wt_req_remove(struct pci_dev *pdev);
+
+#define MBOX_CMD_WORKLOAD_TYPE_READ 0x0E
+#define MBOX_CMD_WORKLOAD_TYPE_WRITE 0x0F
+
+#define MBOX_DATA_BIT_AC_DC 30
+#define MBOX_DATA_BIT_VALID 31
+
+#define SOC_WT_RES_INT_STATUS_OFFSET 0x5B18
+#define SOC_WT_RES_INT_STATUS_MASK GENMASK_ULL(3, 2)
+
+int proc_thermal_read_power_floor_status(struct proc_thermal_device *proc_priv);
+int proc_thermal_power_floor_set_state(struct proc_thermal_device *proc_priv, bool enable);
+bool proc_thermal_power_floor_get_state(struct proc_thermal_device *proc_priv);
+void proc_thermal_power_floor_intr_callback(struct pci_dev *pdev,
+ struct proc_thermal_device *proc_priv);
+bool proc_thermal_check_power_floor_intr(struct proc_thermal_device *proc_priv);
int processor_thermal_send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp);
int processor_thermal_send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data);
+int processor_thermal_mbox_interrupt_config(struct pci_dev *pdev, bool enable, int enable_bit,
+ int time_window);
int proc_thermal_add(struct device *dev, struct proc_thermal_device *priv);
void proc_thermal_remove(struct proc_thermal_device *proc_priv);
+
+int proc_thermal_wt_hint_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+void proc_thermal_wt_hint_remove(struct pci_dev *pdev);
+void proc_thermal_wt_intr_callback(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+bool proc_thermal_check_wt_intr(struct proc_thermal_device *proc_priv);
+
int proc_thermal_suspend(struct device *dev);
int proc_thermal_resume(struct device *dev);
int proc_thermal_mmio_add(struct pci_dev *pdev,
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 0d1e98007270..d7495571dd5d 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -15,6 +15,11 @@
#define DRV_NAME "proc_thermal_pci"
+static bool use_msi;
+module_param(use_msi, bool, 0644);
+MODULE_PARM_DESC(use_msi,
+ "Use PCI MSI based interrupts for processor thermal device.");
+
struct proc_thermal_pci {
struct pci_dev *pdev;
struct proc_thermal_device *proc_priv;
@@ -117,20 +122,64 @@ static void pkg_thermal_schedule_work(struct delayed_work *work)
schedule_delayed_work(work, ms);
}
+static void proc_thermal_clear_soc_int_status(struct proc_thermal_device *proc_priv)
+{
+ u64 status;
+
+ if (!(proc_priv->mmio_feature_mask &
+ (PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR)))
+ return;
+
+ status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+ writeq(status & ~SOC_WT_RES_INT_STATUS_MASK,
+ proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+}
+
+static irqreturn_t proc_thermal_irq_thread_handler(int irq, void *devid)
+{
+ struct proc_thermal_pci *pci_info = devid;
+
+ proc_thermal_wt_intr_callback(pci_info->pdev, pci_info->proc_priv);
+ proc_thermal_power_floor_intr_callback(pci_info->pdev, pci_info->proc_priv);
+ proc_thermal_clear_soc_int_status(pci_info->proc_priv);
+
+ return IRQ_HANDLED;
+}
+
static irqreturn_t proc_thermal_irq_handler(int irq, void *devid)
{
struct proc_thermal_pci *pci_info = devid;
+ struct proc_thermal_device *proc_priv;
+ int ret = IRQ_HANDLED;
u32 status;
+ proc_priv = pci_info->proc_priv;
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_WT_HINT) {
+ if (proc_thermal_check_wt_intr(pci_info->proc_priv))
+ ret = IRQ_WAKE_THREAD;
+ }
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_POWER_FLOOR) {
+ if (proc_thermal_check_power_floor_intr(pci_info->proc_priv))
+ ret = IRQ_WAKE_THREAD;
+ }
+
+ /*
+ * Since now there are two sources of interrupts: one from thermal threshold
+ * and another from workload hint, add a check if there was really a threshold
+ * interrupt before scheduling work function for thermal threshold.
+ */
proc_thermal_mmio_read(pci_info, PROC_THERMAL_MMIO_INT_STATUS_0, &status);
+ if (status) {
+ /* Disable enable interrupt flag */
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
+ pkg_thermal_schedule_work(&pci_info->work);
+ }
- /* Disable enable interrupt flag */
- proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
pci_write_config_byte(pci_info->pdev, 0xdc, 0x01);
- pkg_thermal_schedule_work(&pci_info->work);
-
- return IRQ_HANDLED;
+ return ret;
}
static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
@@ -203,6 +252,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
struct proc_thermal_device *proc_priv;
struct proc_thermal_pci *pci_info;
int irq_flag = 0, irq, ret;
+ bool msi_irq = false;
proc_priv = devm_kzalloc(&pdev->dev, sizeof(*proc_priv), GFP_KERNEL);
if (!proc_priv)
@@ -223,19 +273,19 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
INIT_DELAYED_WORK(&pci_info->work, proc_thermal_threshold_work_fn);
- ret = proc_thermal_add(&pdev->dev, proc_priv);
- if (ret) {
- dev_err(&pdev->dev, "error: proc_thermal_add, will continue\n");
- pci_info->no_legacy = 1;
- }
-
proc_priv->priv_data = pci_info;
pci_info->proc_priv = proc_priv;
pci_set_drvdata(pdev, proc_priv);
ret = proc_thermal_mmio_add(pdev, proc_priv, id->driver_data);
if (ret)
- goto err_ret_thermal;
+ return ret;
+
+ ret = proc_thermal_add(&pdev->dev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "error: proc_thermal_add, will continue\n");
+ pci_info->no_legacy = 1;
+ }
psv_trip.temperature = get_trip_temp(pci_info);
@@ -245,21 +295,26 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
&tzone_params, 0, 0);
if (IS_ERR(pci_info->tzone)) {
ret = PTR_ERR(pci_info->tzone);
- goto err_ret_mmio;
+ goto err_del_legacy;
}
- /* request and enable interrupt */
- ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
- if (ret < 0) {
- dev_err(&pdev->dev, "Failed to allocate vectors!\n");
- goto err_ret_tzone;
- }
- if (!pdev->msi_enabled && !pdev->msix_enabled)
+ if (use_msi && (pdev->msi_enabled || pdev->msix_enabled)) {
+ /* request and enable interrupt */
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to allocate vectors!\n");
+ goto err_ret_tzone;
+ }
+
+ irq = pci_irq_vector(pdev, 0);
+ msi_irq = true;
+ } else {
irq_flag = IRQF_SHARED;
+ irq = pdev->irq;
+ }
- irq = pci_irq_vector(pdev, 0);
ret = devm_request_threaded_irq(&pdev->dev, irq,
- proc_thermal_irq_handler, NULL,
+ proc_thermal_irq_handler, proc_thermal_irq_thread_handler,
irq_flag, KBUILD_MODNAME, pci_info);
if (ret) {
dev_err(&pdev->dev, "Request IRQ %d failed\n", pdev->irq);
@@ -273,14 +328,14 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
return 0;
err_free_vectors:
- pci_free_irq_vectors(pdev);
+ if (msi_irq)
+ pci_free_irq_vectors(pdev);
err_ret_tzone:
thermal_zone_device_unregister(pci_info->tzone);
-err_ret_mmio:
- proc_thermal_mmio_remove(pdev, proc_priv);
-err_ret_thermal:
+err_del_legacy:
if (!pci_info->no_legacy)
proc_thermal_remove(proc_priv);
+ proc_thermal_mmio_remove(pdev, proc_priv);
pci_disable_device(pdev);
return ret;
@@ -350,9 +405,15 @@ static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
proc_thermal_pci_resume);
static const struct pci_device_id proc_thermal_pci_ids[] = {
- { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
- { PCI_DEVICE_DATA(INTEL, MTLP_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX | PROC_THERMAL_FEATURE_DLVR) },
- { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
+ { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL |
+ PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) },
+ { PCI_DEVICE_DATA(INTEL, MTLP_THERMAL, PROC_THERMAL_FEATURE_RAPL |
+ PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_DLVR |
+ PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) },
+ { PCI_DEVICE_DATA(INTEL, ARL_S_THERMAL, PROC_THERMAL_FEATURE_RAPL |
+ PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_WT_HINT) },
+ { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL |
+ PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) },
{ },
};
@@ -368,6 +429,8 @@ static struct pci_driver proc_thermal_pci_driver = {
module_pci_driver(proc_thermal_pci_driver);
+MODULE_IMPORT_NS(INT340X_THERMAL);
+
MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
index 16fd9df5f36d..ccfdd2f9d973 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
@@ -137,7 +137,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, ICL_THERMAL, PROC_THERMAL_FEATURE_RAPL) },
{ PCI_DEVICE_DATA(INTEL, JSL_THERMAL, 0) },
{ PCI_DEVICE_DATA(INTEL, SKL_THERMAL, PROC_THERMAL_FEATURE_RAPL) },
- { PCI_DEVICE_DATA(INTEL, TGL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_MBOX) },
+ { PCI_DEVICE_DATA(INTEL, TGL_THERMAL, PROC_THERMAL_FEATURE_RAPL |
+ PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_WT_REQ) },
{ },
};
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
index 0b89a4340ff4..4d3bd32ff9ea 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
@@ -10,18 +10,12 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include "processor_thermal_device.h"
-#define MBOX_CMD_WORKLOAD_TYPE_READ 0x0E
-#define MBOX_CMD_WORKLOAD_TYPE_WRITE 0x0F
-
#define MBOX_OFFSET_DATA 0x5810
#define MBOX_OFFSET_INTERFACE 0x5818
#define MBOX_BUSY_BIT 31
#define MBOX_RETRY_COUNT 100
-#define MBOX_DATA_BIT_VALID 31
-#define MBOX_DATA_BIT_AC_DC 30
-
static DEFINE_MUTEX(mbox_lock);
static int wait_for_mbox_ready(struct proc_thermal_device *proc_priv)
@@ -51,23 +45,16 @@ static int send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data)
int ret;
proc_priv = pci_get_drvdata(pdev);
-
- mutex_lock(&mbox_lock);
-
ret = wait_for_mbox_ready(proc_priv);
if (ret)
- goto unlock_mbox;
+ return ret;
writel(data, (proc_priv->mmio_base + MBOX_OFFSET_DATA));
/* Write command register */
reg_data = BIT_ULL(MBOX_BUSY_BIT) | id;
writel(reg_data, (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
- ret = wait_for_mbox_ready(proc_priv);
-
-unlock_mbox:
- mutex_unlock(&mbox_lock);
- return ret;
+ return wait_for_mbox_ready(proc_priv);
}
static int send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
@@ -77,12 +64,9 @@ static int send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
int ret;
proc_priv = pci_get_drvdata(pdev);
-
- mutex_lock(&mbox_lock);
-
ret = wait_for_mbox_ready(proc_priv);
if (ret)
- goto unlock_mbox;
+ return ret;
/* Write command register */
reg_data = BIT_ULL(MBOX_BUSY_BIT) | id;
@@ -90,152 +74,85 @@ static int send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
ret = wait_for_mbox_ready(proc_priv);
if (ret)
- goto unlock_mbox;
+ return ret;
if (id == MBOX_CMD_WORKLOAD_TYPE_READ)
*resp = readl(proc_priv->mmio_base + MBOX_OFFSET_DATA);
else
*resp = readq(proc_priv->mmio_base + MBOX_OFFSET_DATA);
-unlock_mbox:
- mutex_unlock(&mbox_lock);
- return ret;
+ return 0;
}
int processor_thermal_send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
{
- return send_mbox_read_cmd(pdev, id, resp);
+ int ret;
+
+ mutex_lock(&mbox_lock);
+ ret = send_mbox_read_cmd(pdev, id, resp);
+ mutex_unlock(&mbox_lock);
+
+ return ret;
}
EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_read_cmd, INT340X_THERMAL);
int processor_thermal_send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data)
{
- return send_mbox_write_cmd(pdev, id, data);
-}
-EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_write_cmd, INT340X_THERMAL);
-
-/* List of workload types */
-static const char * const workload_types[] = {
- "none",
- "idle",
- "semi_active",
- "bursty",
- "sustained",
- "battery_life",
- NULL
-};
-
-static ssize_t workload_available_types_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- int i = 0;
- int ret = 0;
-
- while (workload_types[i] != NULL)
- ret += sprintf(&buf[ret], "%s ", workload_types[i++]);
+ int ret;
- ret += sprintf(&buf[ret], "\n");
+ mutex_lock(&mbox_lock);
+ ret = send_mbox_write_cmd(pdev, id, data);
+ mutex_unlock(&mbox_lock);
return ret;
}
+EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_write_cmd, INT340X_THERMAL);
-static DEVICE_ATTR_RO(workload_available_types);
-
-static ssize_t workload_type_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- char str_preference[15];
- u32 data = 0;
- ssize_t ret;
-
- ret = sscanf(buf, "%14s", str_preference);
- if (ret != 1)
- return -EINVAL;
-
- ret = match_string(workload_types, -1, str_preference);
- if (ret < 0)
- return ret;
-
- ret &= 0xff;
-
- if (ret)
- data = BIT(MBOX_DATA_BIT_VALID) | BIT(MBOX_DATA_BIT_AC_DC);
-
- data |= ret;
-
- ret = send_mbox_write_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_WRITE, data);
- if (ret)
- return false;
-
- return count;
-}
+#define MBOX_CAMARILLO_RD_INTR_CONFIG 0x1E
+#define MBOX_CAMARILLO_WR_INTR_CONFIG 0x1F
+#define WLT_TW_MASK GENMASK_ULL(30, 24)
+#define SOC_PREDICTION_TW_SHIFT 24
-static ssize_t workload_type_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+int processor_thermal_mbox_interrupt_config(struct pci_dev *pdev, bool enable,
+ int enable_bit, int time_window)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- u64 cmd_resp;
+ u64 data;
int ret;
- ret = send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
- if (ret)
- return false;
-
- cmd_resp &= 0xff;
-
- if (cmd_resp > ARRAY_SIZE(workload_types) - 1)
- return -EINVAL;
+ if (!pdev)
+ return -ENODEV;
- return sprintf(buf, "%s\n", workload_types[cmd_resp]);
-}
-
-static DEVICE_ATTR_RW(workload_type);
+ mutex_lock(&mbox_lock);
-static struct attribute *workload_req_attrs[] = {
- &dev_attr_workload_available_types.attr,
- &dev_attr_workload_type.attr,
- NULL
-};
+ /* Do read modify write for MBOX_CAMARILLO_RD_INTR_CONFIG */
-static const struct attribute_group workload_req_attribute_group = {
- .attrs = workload_req_attrs,
- .name = "workload_request"
-};
+ ret = send_mbox_read_cmd(pdev, MBOX_CAMARILLO_RD_INTR_CONFIG, &data);
+ if (ret) {
+ dev_err(&pdev->dev, "MBOX_CAMARILLO_RD_INTR_CONFIG failed\n");
+ goto unlock;
+ }
-static bool workload_req_created;
+ if (time_window >= 0) {
+ data &= ~WLT_TW_MASK;
-int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
-{
- u64 cmd_resp;
- int ret;
+ /* Program notification delay */
+ data |= ((u64)time_window << SOC_PREDICTION_TW_SHIFT) & WLT_TW_MASK;
+ }
- /* Check if there is a mailbox support, if fails return success */
- ret = send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
- if (ret)
- return 0;
+ if (enable)
+ data |= BIT(enable_bit);
+ else
+ data &= ~BIT(enable_bit);
- ret = sysfs_create_group(&pdev->dev.kobj, &workload_req_attribute_group);
+ ret = send_mbox_write_cmd(pdev, MBOX_CAMARILLO_WR_INTR_CONFIG, data);
if (ret)
- return ret;
+ dev_err(&pdev->dev, "MBOX_CAMARILLO_WR_INTR_CONFIG failed\n");
- workload_req_created = true;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(proc_thermal_mbox_add);
-
-void proc_thermal_mbox_remove(struct pci_dev *pdev)
-{
- if (workload_req_created)
- sysfs_remove_group(&pdev->dev.kobj, &workload_req_attribute_group);
-
- workload_req_created = false;
+unlock:
+ mutex_unlock(&mbox_lock);
+ return ret;
}
-EXPORT_SYMBOL_GPL(proc_thermal_mbox_remove);
+EXPORT_SYMBOL_NS_GPL(processor_thermal_mbox_interrupt_config, INT340X_THERMAL);
MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_power_floor.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_power_floor.c
new file mode 100644
index 000000000000..a1a108407f0f
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_power_floor.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Processor thermal device module for registering and processing
+ * power floor. When the hardware reduces the power to the minimum
+ * possible, the power floor is notified via an interrupt.
+ *
+ * Operation:
+ * When user space enables power floor reporting:
+ * - Use mailbox to:
+ * Enable processor thermal device interrupt
+ *
+ * - Current status of power floor is read from offset 0x5B18
+ * bit 39.
+ *
+ * Two interface functions are provided to call when there is a
+ * thermal device interrupt:
+ * - proc_thermal_power_floor_intr():
+ * Check if the interrupt is for change in power floor.
+ * Called from interrupt context.
+ *
+ * - proc_thermal_power_floor_intr_callback():
+ * Callback for interrupt processing in thread context. This involves
+ * sending notification to user space that there is a change in the
+ * power floor status.
+ *
+ * Copyright (c) 2023, Intel Corporation.
+ */
+
+#include <linux/pci.h>
+#include "processor_thermal_device.h"
+
+#define SOC_POWER_FLOOR_STATUS BIT(39)
+#define SOC_POWER_FLOOR_SHIFT 39
+
+#define SOC_POWER_FLOOR_INT_ENABLE_BIT 31
+#define SOC_POWER_FLOOR_INT_ACTIVE BIT(3)
+
+int proc_thermal_read_power_floor_status(struct proc_thermal_device *proc_priv)
+{
+ u64 status = 0;
+
+ status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+ return (status & SOC_POWER_FLOOR_STATUS) >> SOC_POWER_FLOOR_SHIFT;
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_read_power_floor_status, INT340X_THERMAL);
+
+static bool enable_state;
+static DEFINE_MUTEX(pf_lock);
+
+int proc_thermal_power_floor_set_state(struct proc_thermal_device *proc_priv, bool enable)
+{
+ int ret = 0;
+
+ mutex_lock(&pf_lock);
+ if (enable_state == enable)
+ goto pf_unlock;
+
+ /*
+ * Time window parameter is not applicable to power floor interrupt configuration.
+ * Hence use -1 for time window.
+ */
+ ret = processor_thermal_mbox_interrupt_config(to_pci_dev(proc_priv->dev), enable,
+ SOC_POWER_FLOOR_INT_ENABLE_BIT, -1);
+ if (!ret)
+ enable_state = enable;
+
+pf_unlock:
+ mutex_unlock(&pf_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_power_floor_set_state, INT340X_THERMAL);
+
+bool proc_thermal_power_floor_get_state(struct proc_thermal_device *proc_priv)
+{
+ return enable_state;
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_power_floor_get_state, INT340X_THERMAL);
+
+/**
+ * proc_thermal_check_power_floor_intr() - Check power floor interrupt.
+ * @proc_priv: Processor thermal device instance.
+ *
+ * Callback to check if the interrupt for power floor is active.
+ *
+ * Context: Called from interrupt context.
+ *
+ * Return: true if power floor is active, false when not active.
+ */
+bool proc_thermal_check_power_floor_intr(struct proc_thermal_device *proc_priv)
+{
+ u64 int_status;
+
+ int_status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+ return !!(int_status & SOC_POWER_FLOOR_INT_ACTIVE);
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_check_power_floor_intr, INT340X_THERMAL);
+
+/**
+ * proc_thermal_power_floor_intr_callback() - Process power floor notification
+ * @pdev: PCI device instance
+ * @proc_priv: Processor thermal device instance.
+ *
+ * Check if the power floor interrupt is active, if active send notification to
+ * user space for the attribute "power_limits", so that user can read the attribute
+ * and take action.
+ *
+ * Context: Called from interrupt thread context.
+ *
+ * Return: None.
+ */
+void proc_thermal_power_floor_intr_callback(struct pci_dev *pdev,
+ struct proc_thermal_device *proc_priv)
+{
+ u64 status;
+
+ status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+ if (!(status & SOC_POWER_FLOOR_INT_ACTIVE))
+ return;
+
+ sysfs_notify(&pdev->dev.kobj, "power_limits", "power_floor_status");
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_power_floor_intr_callback, INT340X_THERMAL);
+
+MODULE_IMPORT_NS(INT340X_THERMAL);
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_wt_hint.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_wt_hint.c
new file mode 100644
index 000000000000..9d5e4c169d1b
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_wt_hint.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor thermal device interface for reading workload type hints
+ * from the user space. The hints are provided by the firmware.
+ *
+ * Operation:
+ * When user space enables workload type prediction:
+ * - Use mailbox to:
+ * Configure notification delay
+ * Enable processor thermal device interrupt
+ *
+ * - The predicted workload type can be read from MMIO:
+ * Offset 0x5B18 shows if there was an interrupt
+ * active for change in workload type and also
+ * predicted workload type.
+ *
+ * Two interface functions are provided to call when there is a
+ * thermal device interrupt:
+ * - proc_thermal_check_wt_intr():
+ * Check if the interrupt is for change in workload type. Called from
+ * interrupt context.
+ *
+ * - proc_thermal_wt_intr_callback():
+ * Callback for interrupt processing in thread context. This involves
+ * sending notification to user space that there is a change in the
+ * workload type.
+ *
+ * Copyright (c) 2023, Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/pci.h>
+#include "processor_thermal_device.h"
+
+#define SOC_WT GENMASK_ULL(47, 40)
+
+#define SOC_WT_PREDICTION_INT_ENABLE_BIT 23
+
+#define SOC_WT_PREDICTION_INT_ACTIVE BIT(2)
+
+/*
+ * Closest possible to 1 Second is 1024 ms with programmed time delay
+ * of 0x0A.
+ */
+static u8 notify_delay = 0x0A;
+static u16 notify_delay_ms = 1024;
+
+static DEFINE_MUTEX(wt_lock);
+static u8 wt_enable;
+
+/* Show current predicted workload type index */
+static ssize_t workload_type_index_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct proc_thermal_device *proc_priv;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u64 status = 0;
+ int wt;
+
+ mutex_lock(&wt_lock);
+ if (!wt_enable) {
+ mutex_unlock(&wt_lock);
+ return -ENODATA;
+ }
+
+ proc_priv = pci_get_drvdata(pdev);
+
+ status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+
+ mutex_unlock(&wt_lock);
+
+ wt = FIELD_GET(SOC_WT, status);
+
+ return sysfs_emit(buf, "%d\n", wt);
+}
+
+static DEVICE_ATTR_RO(workload_type_index);
+
+static ssize_t workload_hint_enable_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", wt_enable);
+}
+
+static ssize_t workload_hint_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u8 mode;
+ int ret;
+
+ if (kstrtou8(buf, 10, &mode) || mode > 1)
+ return -EINVAL;
+
+ mutex_lock(&wt_lock);
+
+ if (mode)
+ ret = processor_thermal_mbox_interrupt_config(pdev, true,
+ SOC_WT_PREDICTION_INT_ENABLE_BIT,
+ notify_delay);
+ else
+ ret = processor_thermal_mbox_interrupt_config(pdev, false,
+ SOC_WT_PREDICTION_INT_ENABLE_BIT, 0);
+
+ if (ret)
+ goto ret_enable_store;
+
+ ret = size;
+ wt_enable = mode;
+
+ret_enable_store:
+ mutex_unlock(&wt_lock);
+
+ return ret;
+}
+
+static DEVICE_ATTR_RW(workload_hint_enable);
+
+static ssize_t notification_delay_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", notify_delay_ms);
+}
+
+static ssize_t notification_delay_ms_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 new_tw;
+ int ret;
+ u8 tm;
+
+ /*
+ * Time window register value:
+ * Formula: (1 + x/4) * power(2,y)
+ * x = 2 msbs, that is [30:29] y = 5 [28:24]
+ * in INTR_CONFIG register.
+ * The result will be in milli seconds.
+ * Here, just keep x = 0, and just change y.
+ * First round up the user value to power of 2 and
+ * then take log2, to get "y" value to program.
+ */
+ ret = kstrtou16(buf, 10, &new_tw);
+ if (ret)
+ return ret;
+
+ if (!new_tw)
+ return -EINVAL;
+
+ new_tw = roundup_pow_of_two(new_tw);
+ tm = ilog2(new_tw);
+ if (tm > 31)
+ return -EINVAL;
+
+ mutex_lock(&wt_lock);
+
+ /* If the workload hint was already enabled, then update with the new delay */
+ if (wt_enable)
+ ret = processor_thermal_mbox_interrupt_config(pdev, true,
+ SOC_WT_PREDICTION_INT_ENABLE_BIT,
+ tm);
+
+ if (!ret) {
+ ret = size;
+ notify_delay = tm;
+ notify_delay_ms = new_tw;
+ }
+
+ mutex_unlock(&wt_lock);
+
+ return ret;
+}
+
+static DEVICE_ATTR_RW(notification_delay_ms);
+
+static struct attribute *workload_hint_attrs[] = {
+ &dev_attr_workload_type_index.attr,
+ &dev_attr_workload_hint_enable.attr,
+ &dev_attr_notification_delay_ms.attr,
+ NULL
+};
+
+static const struct attribute_group workload_hint_attribute_group = {
+ .attrs = workload_hint_attrs,
+ .name = "workload_hint"
+};
+
+/*
+ * Callback to check if the interrupt for prediction is active.
+ * Caution: Called from the interrupt context.
+ */
+bool proc_thermal_check_wt_intr(struct proc_thermal_device *proc_priv)
+{
+ u64 int_status;
+
+ int_status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+ if (int_status & SOC_WT_PREDICTION_INT_ACTIVE)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_check_wt_intr, INT340X_THERMAL);
+
+/* Callback to notify user space */
+void proc_thermal_wt_intr_callback(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ u64 status;
+
+ status = readq(proc_priv->mmio_base + SOC_WT_RES_INT_STATUS_OFFSET);
+ if (!(status & SOC_WT_PREDICTION_INT_ACTIVE))
+ return;
+
+ sysfs_notify(&pdev->dev.kobj, "workload_hint", "workload_type_index");
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_wt_intr_callback, INT340X_THERMAL);
+
+static bool workload_hint_created;
+
+int proc_thermal_wt_hint_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ int ret;
+
+ ret = sysfs_create_group(&pdev->dev.kobj, &workload_hint_attribute_group);
+ if (ret)
+ return ret;
+
+ workload_hint_created = true;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_wt_hint_add, INT340X_THERMAL);
+
+void proc_thermal_wt_hint_remove(struct pci_dev *pdev)
+{
+ mutex_lock(&wt_lock);
+ if (wt_enable)
+ processor_thermal_mbox_interrupt_config(pdev, false,
+ SOC_WT_PREDICTION_INT_ENABLE_BIT,
+ 0);
+ mutex_unlock(&wt_lock);
+
+ if (workload_hint_created)
+ sysfs_remove_group(&pdev->dev.kobj, &workload_hint_attribute_group);
+
+ workload_hint_created = false;
+}
+EXPORT_SYMBOL_NS_GPL(proc_thermal_wt_hint_remove, INT340X_THERMAL);
+
+MODULE_IMPORT_NS(INT340X_THERMAL);
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_wt_req.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_wt_req.c
new file mode 100644
index 000000000000..711c4f761c9a
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_wt_req.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor thermal device for Workload type hints
+ * update from user space
+ *
+ * Copyright (c) 2020-2023, Intel Corporation.
+ */
+
+#include <linux/pci.h>
+#include "processor_thermal_device.h"
+
+/* List of workload types */
+static const char * const workload_types[] = {
+ "none",
+ "idle",
+ "semi_active",
+ "bursty",
+ "sustained",
+ "battery_life",
+ NULL
+};
+
+static ssize_t workload_available_types_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int i = 0;
+ int ret = 0;
+
+ while (workload_types[i] != NULL)
+ ret += sprintf(&buf[ret], "%s ", workload_types[i++]);
+
+ ret += sprintf(&buf[ret], "\n");
+
+ return ret;
+}
+
+static DEVICE_ATTR_RO(workload_available_types);
+
+static ssize_t workload_type_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ char str_preference[15];
+ u32 data = 0;
+ ssize_t ret;
+
+ ret = sscanf(buf, "%14s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = match_string(workload_types, -1, str_preference);
+ if (ret < 0)
+ return ret;
+
+ ret &= 0xff;
+
+ if (ret)
+ data = BIT(MBOX_DATA_BIT_VALID) | BIT(MBOX_DATA_BIT_AC_DC);
+
+ data |= ret;
+
+ ret = processor_thermal_send_mbox_write_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_WRITE, data);
+ if (ret)
+ return false;
+
+ return count;
+}
+
+static ssize_t workload_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u64 cmd_resp;
+ int ret;
+
+ ret = processor_thermal_send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
+ if (ret)
+ return false;
+
+ cmd_resp &= 0xff;
+
+ if (cmd_resp > ARRAY_SIZE(workload_types) - 1)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", workload_types[cmd_resp]);
+}
+
+static DEVICE_ATTR_RW(workload_type);
+
+static struct attribute *workload_req_attrs[] = {
+ &dev_attr_workload_available_types.attr,
+ &dev_attr_workload_type.attr,
+ NULL
+};
+
+static const struct attribute_group workload_req_attribute_group = {
+ .attrs = workload_req_attrs,
+ .name = "workload_request"
+};
+
+static bool workload_req_created;
+
+int proc_thermal_wt_req_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ u64 cmd_resp;
+ int ret;
+
+ /* Check if there is a mailbox support, if fails return success */
+ ret = processor_thermal_send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
+ if (ret)
+ return 0;
+
+ ret = sysfs_create_group(&pdev->dev.kobj, &workload_req_attribute_group);
+ if (ret)
+ return ret;
+
+ workload_req_created = true;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_wt_req_add);
+
+void proc_thermal_wt_req_remove(struct pci_dev *pdev)
+{
+ if (workload_req_created)
+ sysfs_remove_group(&pdev->dev.kobj, &workload_req_attribute_group);
+
+ workload_req_created = false;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_wt_req_remove);
+
+MODULE_IMPORT_NS(INT340X_THERMAL);
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 36243a3972fd..5ac5cb60bae6 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -256,7 +256,7 @@ skip_limit_set:
static const struct kernel_param_ops max_idle_ops = {
.set = max_idle_set,
- .get = param_get_int,
+ .get = param_get_byte,
};
module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 42806add0114..c99de76fb20d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -85,6 +85,8 @@ TARGETS += syscall_user_dispatch
TARGETS += sysctl
TARGETS += tc-testing
TARGETS += tdx
+TARGETS += thermal/intel/power_floor
+TARGETS += thermal/intel/workload_hint
TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
diff --git a/tools/testing/selftests/thermal/intel/power_floor/Makefile b/tools/testing/selftests/thermal/intel/power_floor/Makefile
new file mode 100644
index 000000000000..9b88e57dbba5
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := power_floor_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
new file mode 100644
index 000000000000..0326b39a11b9
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define POWER_FLOOR_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_enable"
+#define POWER_FLOOR_STATUS_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_status"
+
+void power_floor_exit(int signum)
+{
+ int fd;
+
+ /* Disable feature via sysfs knob */
+
+ fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open power floor enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "0\n", 2) < 0) {
+ perror("Can' disable power floor notifications\n");
+ exit(1);
+ }
+
+ printf("Disabled power floor notifications\n");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd ufd;
+ char status_str[3];
+ int fd, ret;
+
+ if (signal(SIGINT, power_floor_exit) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGHUP, power_floor_exit) == SIG_IGN)
+ signal(SIGHUP, SIG_IGN);
+ if (signal(SIGTERM, power_floor_exit) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ /* Enable feature via sysfs knob */
+ fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open power floor enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "1\n", 2) < 0) {
+ perror("Can' enable power floor notifications\n");
+ exit(1);
+ }
+
+ close(fd);
+
+ printf("Enabled power floor notifications\n");
+
+ while (1) {
+ fd = open(POWER_FLOOR_STATUS_ATTRIBUTE, O_RDONLY);
+ if (fd < 0) {
+ perror("Unable to power floor status file\n");
+ exit(1);
+ }
+
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, status_str, sizeof(status_str)) < 0) {
+ fprintf(stderr, "Failed to read from:%s\n",
+ POWER_FLOOR_STATUS_ATTRIBUTE);
+ exit(1);
+ }
+
+ ufd.fd = fd;
+ ufd.events = POLLPRI;
+
+ ret = poll(&ufd, 1, -1);
+ if (ret < 0) {
+ perror("poll error");
+ exit(1);
+ } else if (ret == 0) {
+ printf("Poll Timeout\n");
+ } else {
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, status_str, sizeof(status_str)) < 0)
+ exit(0);
+
+ printf("power floor status: %s\n", status_str);
+ }
+
+ close(fd);
+ }
+}
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/Makefile b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
new file mode 100644
index 000000000000..37ff3286283b
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := workload_hint_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
new file mode 100644
index 000000000000..217c3a641c53
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms"
+#define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable"
+#define WORKLOAD_TYPE_INDEX_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index"
+
+static const char * const workload_types[] = {
+ "idle",
+ "battery_life",
+ "sustained",
+ "bursty",
+ NULL
+};
+
+#define WORKLOAD_TYPE_MAX_INDEX 3
+
+void workload_hint_exit(int signum)
+{
+ int fd;
+
+ /* Disable feature via sysfs knob */
+
+ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload type feature enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "0\n", 2) < 0) {
+ perror("Can' disable workload hints\n");
+ exit(1);
+ }
+
+ printf("Disabled workload type prediction\n");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd ufd;
+ char index_str[4];
+ int fd, ret, index;
+ char delay_str[64];
+ int delay = 0;
+
+ printf("Usage: workload_hint_test [notification delay in milli seconds]\n");
+
+ if (argc > 1) {
+ ret = sscanf(argv[1], "%d", &delay);
+ if (ret < 0) {
+ printf("Invalid delay\n");
+ exit(1);
+ }
+
+ printf("Setting notification delay to %d ms\n", delay);
+ if (delay < 0)
+ exit(1);
+
+ sprintf(delay_str, "%s\n", argv[1]);
+
+ sprintf(delay_str, "%s\n", argv[1]);
+ fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload notification delay\n");
+ exit(1);
+ }
+
+ if (write(fd, delay_str, strlen(delay_str)) < 0) {
+ perror("Can't set delay\n");
+ exit(1);
+ }
+
+ close(fd);
+ }
+
+ if (signal(SIGINT, workload_hint_exit) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGHUP, workload_hint_exit) == SIG_IGN)
+ signal(SIGHUP, SIG_IGN);
+ if (signal(SIGTERM, workload_hint_exit) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ /* Enable feature via sysfs knob */
+ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload type feature enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "1\n", 2) < 0) {
+ perror("Can' enable workload hints\n");
+ exit(1);
+ }
+
+ close(fd);
+
+ printf("Enabled workload type prediction\n");
+
+ while (1) {
+ fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY);
+ if (fd < 0) {
+ perror("Unable to open workload type file\n");
+ exit(1);
+ }
+
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, index_str, sizeof(index_str)) < 0) {
+ fprintf(stderr, "Failed to read from:%s\n",
+ WORKLOAD_TYPE_INDEX_ATTRIBUTE);
+ exit(1);
+ }
+
+ ufd.fd = fd;
+ ufd.events = POLLPRI;
+
+ ret = poll(&ufd, 1, -1);
+ if (ret < 0) {
+ perror("poll error");
+ exit(1);
+ } else if (ret == 0) {
+ printf("Poll Timeout\n");
+ } else {
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, index_str, sizeof(index_str)) < 0)
+ exit(0);
+
+ ret = sscanf(index_str, "%d", &index);
+ if (ret < 0)
+ break;
+ if (index > WORKLOAD_TYPE_MAX_INDEX)
+ printf("Invalid workload type index\n");
+ else
+ printf("workload type:%s\n", workload_types[index]);
+ }
+
+ close(fd);
+ }
+}