summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2023-02-07 17:39:42 +0100
committerHeiko Carstens <hca@linux.ibm.com>2023-02-09 20:11:23 +0100
commitbe76ea61446095c045641404128bc6862545cda1 (patch)
treed715d7fc8d110f5d41dcce2caef12cc586a8a898 /arch/s390
parents390/vx: use simple assignments to access __vector128 members (diff)
downloadlinux-be76ea61446095c045641404128bc6862545cda1.tar.xz
linux-be76ea61446095c045641404128bc6862545cda1.zip
s390/idle: remove arch_cpu_idle_time() and corresponding code
arch_cpu_idle_time() returns the idle time of any given cpu if it is in idle, or zero if not. All if this is racy and partially incorrect. Time stamps taken with store clock extended and store clock fast from different cpus are compared, while the architecture states that this is nothing which can be relied on (see Principles of Operation; Chapter 4, "Setting and Inspecting the Clock"). A more fundamental problem is that the timestamp when a cpu is leaving idle is taken early in the assembler part of the interrupt handler, and this value is only transferred many cycles later to the cpu's per-cpu idle data structure. This per cpu data structure is read by arch_cpu_idle() to tell for which period of time a remote cpu is idle: if only an idle_enter value is present, the assumed idle time of the cpu is calculated by taking a local timestamp and returning the difference of the local timestamp and the idle_enter value. This is potentially incorrect, since the remote cpu may have already left idle, but the taken timestamp may not have been transferred to the per-cpu data structure. This in turn means that too much idle time may be reported for a cpu, and a subsequent calculation of system idle time may result in a smaller value. Instead of coming up with even more complex code trying to fix this, just remove this code, and only account idle time of a cpu, after idle state is left. Another minor bug is that it is assumed that timestamps are non-zero, which is not necessarily the case for timestamps taken with store clock fast. This however is just a very minor problem, since this can only happen when the epoch increases. Reviewed-by: Sven Schnelle <svens@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/cputime.h4
-rw-r--r--arch/s390/include/asm/idle.h4
-rw-r--r--arch/s390/kernel/idle.c77
3 files changed, 11 insertions, 74 deletions
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 7f9284e2a7db..30bb3ec4e5fc 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -16,10 +16,6 @@
*/
#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
-u64 arch_cpu_idle_time(int cpu);
-
-#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
-
void account_idle_time_irq(void);
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 5cea629c548e..af72a2b35758 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -10,16 +10,12 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/seqlock.h>
struct s390_idle_data {
- seqcount_t seqcount;
unsigned long idle_count;
unsigned long idle_time;
unsigned long clock_idle_enter;
- unsigned long clock_idle_exit;
unsigned long timer_idle_enter;
- unsigned long timer_idle_exit;
unsigned long mt_cycles_enter[8];
};
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 0f4585022e08..dd8351e76539 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -35,24 +35,18 @@ void account_idle_time_irq(void)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle->clock_idle_exit = S390_lowcore.int_clock;
- idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
+ idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = idle->clock_idle_exit;
+ S390_lowcore.last_update_clock = S390_lowcore.int_clock;
S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = idle->timer_idle_exit;
+ S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
/* Account time spent with enabled wait psw loaded as idle time. */
- raw_write_seqcount_begin(&idle->seqcount);
- idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
- idle->clock_idle_enter = 0;
- idle->clock_idle_exit = 0;
- idle->idle_time += idle_time;
- idle->idle_count++;
+ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+ WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
account_idle_time(cputime_to_nsecs(idle_time));
- raw_write_seqcount_end(&idle->seqcount);
}
void noinstr arch_cpu_idle(void)
@@ -71,71 +65,22 @@ void noinstr arch_cpu_idle(void)
}
static ssize_t show_idle_count(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long idle_count;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_count = READ_ONCE(idle->idle_count);
- if (READ_ONCE(idle->clock_idle_enter))
- idle_count++;
- } while (read_seqcount_retry(&idle->seqcount, seq));
- return sprintf(buf, "%lu\n", idle_count);
+
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
}
DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
- unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_time = READ_ONCE(idle->idle_time);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- idle_time += in_idle;
- return sprintf(buf, "%lu\n", idle_time >> 12);
-}
-DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-u64 arch_cpu_idle_time(int cpu)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long now, idle_enter, idle_exit, in_idle;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- return cputime_to_nsecs(in_idle);
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
void arch_cpu_idle_enter(void)
{