summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2014-07-14 18:26:47 +0200
committerH. Peter Anvin <hpa@zytor.com>2014-07-14 18:27:25 +0200
commitcbd4ebcba16c31abc341e1810e6e0fb4c35117b3 (patch)
tree49a3b54c709d11efbee631a85cca5fb550515005 /include
parentx86, MCE: Robustify mcheck_init_device (diff)
parentRAS, extlog: Adjust init flow (diff)
downloadlinux-cbd4ebcba16c31abc341e1810e6e0fb4c35117b3.tar.xz
linux-cbd4ebcba16c31abc341e1810e6e0fb4c35117b3.zip
Merge tag 'please-pull-extlog-trace' into x86/ras
Report extended error information ("extlog") using a trace/event. Provide a mechanism for a smart daemon collecting this information to tell the kernel to skip logging corrected errors to the console. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'include')
-rw-r--r--include/linux/cper.h32
-rw-r--r--include/linux/ras.h14
-rw-r--r--include/ras/ras_event.h128
-rw-r--r--include/trace/events/ras.h77
-rw-r--r--include/trace/ftrace.h33
-rw-r--r--include/trace/syscall.h15
6 files changed, 222 insertions, 77 deletions
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 2fc0ec3d89cc..76abba4b238e 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
#define LINUX_CPER_H
#include <linux/uuid.h>
+#include <linux/trace_seq.h>
/* CPER record signature and the size */
#define CPER_SIG_RECORD "CPER"
@@ -36,6 +37,13 @@
#define CPER_RECORD_REV 0x0100
/*
+ * CPER record length contains the CPER fields which are relevant for further
+ * handling of a memory error in userspace (we don't carry all the fields
+ * defined in the UEFI spec because some of them don't make any sense.)
+ * Currently, a length of 256 should be more than enough.
+ */
+#define CPER_REC_LEN 256
+/*
* Severity difinition for error_severity in struct cper_record_header
* and section_severity in struct cper_section_descriptor
*/
@@ -356,6 +364,24 @@ struct cper_sec_mem_err {
__u16 mem_dev_handle; /* module handle in UEFI 2.4 */
};
+struct cper_mem_err_compact {
+ __u64 validation_bits;
+ __u16 node;
+ __u16 card;
+ __u16 module;
+ __u16 bank;
+ __u16 device;
+ __u16 row;
+ __u16 column;
+ __u16 bit_pos;
+ __u64 requestor_id;
+ __u64 responder_id;
+ __u64 target_id;
+ __u16 rank;
+ __u16 mem_array_handle;
+ __u16 mem_dev_handle;
+};
+
struct cper_sec_pcie {
__u64 validation_bits;
__u32 port_type;
@@ -395,7 +421,13 @@ struct cper_sec_pcie {
#pragma pack()
u64 cper_next_record_id(void);
+const char *cper_severity_str(unsigned int);
+const char *cper_mem_err_type_str(unsigned int);
void cper_print_bits(const char *prefix, unsigned int bits,
const char * const strs[], unsigned int strs_size);
+void cper_mem_err_pack(const struct cper_sec_mem_err *,
+ struct cper_mem_err_compact *);
+const char *cper_mem_err_unpack(struct trace_seq *,
+ struct cper_mem_err_compact *);
#endif
diff --git a/include/linux/ras.h b/include/linux/ras.h
new file mode 100644
index 000000000000..2aceeafd6fe5
--- /dev/null
+++ b/include/linux/ras.h
@@ -0,0 +1,14 @@
+#ifndef __RAS_H__
+#define __RAS_H__
+
+#ifdef CONFIG_DEBUG_FS
+int ras_userspace_consumers(void);
+void ras_debugfs_init(void);
+int ras_add_daemon_trace(void);
+#else
+static inline int ras_userspace_consumers(void) { return 0; }
+static inline void ras_debugfs_init(void) { return; }
+static inline int ras_add_daemon_trace(void) { return 0; }
+#endif
+
+#endif
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 21cdb0b7b0fb..47da53c27ffa 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -8,6 +8,71 @@
#include <linux/tracepoint.h>
#include <linux/edac.h>
#include <linux/ktime.h>
+#include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+TRACE_EVENT(extlog_mem_event,
+ TP_PROTO(struct cper_sec_mem_err *mem,
+ u32 err_seq,
+ const uuid_le *fru_id,
+ const char *fru_text,
+ u8 sev),
+
+ TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, etype)
+ __field(u8, sev)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(uuid_le, fru_id)
+ __string(fru_text, fru_text)
+ __field_struct(struct cper_mem_err_compact, data)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+ if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+ __entry->etype = mem->error_type;
+ else
+ __entry->etype = ~0;
+ __entry->sev = sev;
+ if (mem->validation_bits & CPER_MEM_VALID_PA)
+ __entry->pa = mem->physical_addr;
+ else
+ __entry->pa = ~0ull;
+
+ if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+ __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
+ else
+ __entry->pa_mask_lsb = ~0;
+ __entry->fru_id = *fru_id;
+ __assign_str(fru_text, fru_text);
+ cper_mem_err_pack(mem, &__entry->data);
+ ),
+
+ TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_mem_err_type_str(__entry->etype),
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_mem_err_unpack(p, &__entry->data),
+ &__entry->fru_id,
+ __get_str(fru_text))
+);
+#endif
/*
* Hardware Events Report
@@ -94,6 +159,69 @@ TRACE_EVENT(mc_event,
__get_str(driver_detail))
);
+/*
+ * PCIe AER Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a PCIe device. The event report has
+ * the following structure:
+ *
+ * char * dev_name - The name of the slot where the device resides
+ * ([domain:]bus:device.function).
+ * u32 status - Either the correctable or uncorrectable register
+ * indicating what error or errors have been seen
+ * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define aer_correctable_errors \
+ {BIT(0), "Receiver Error"}, \
+ {BIT(6), "Bad TLP"}, \
+ {BIT(7), "Bad DLLP"}, \
+ {BIT(8), "RELAY_NUM Rollover"}, \
+ {BIT(12), "Replay Timer Timeout"}, \
+ {BIT(13), "Advisory Non-Fatal"}
+
+#define aer_uncorrectable_errors \
+ {BIT(4), "Data Link Protocol"}, \
+ {BIT(12), "Poisoned TLP"}, \
+ {BIT(13), "Flow Control Protocol"}, \
+ {BIT(14), "Completion Timeout"}, \
+ {BIT(15), "Completer Abort"}, \
+ {BIT(16), "Unexpected Completion"}, \
+ {BIT(17), "Receiver Overflow"}, \
+ {BIT(18), "Malformed TLP"}, \
+ {BIT(19), "ECRC"}, \
+ {BIT(20), "Unsupported Request"}
+
+TRACE_EVENT(aer_event,
+ TP_PROTO(const char *dev_name,
+ const u32 status,
+ const u8 severity),
+
+ TP_ARGS(dev_name, status, severity),
+
+ TP_STRUCT__entry(
+ __string( dev_name, dev_name )
+ __field( u32, status )
+ __field( u8, severity )
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name);
+ __entry->status = status;
+ __entry->severity = severity;
+ ),
+
+ TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
+ __get_str(dev_name),
+ __entry->severity == AER_CORRECTABLE ? "Corrected" :
+ __entry->severity == AER_FATAL ?
+ "Fatal" : "Uncorrected, non-fatal",
+ __entry->severity == AER_CORRECTABLE ?
+ __print_flags(__entry->status, "|", aer_correctable_errors) :
+ __print_flags(__entry->status, "|", aer_uncorrectable_errors))
+);
+
#endif /* _TRACE_HW_EVENT_MC_H */
/* This part must be outside protection */
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
deleted file mode 100644
index 1c875ad1ee5f..000000000000
--- a/include/trace/events/ras.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM ras
-
-#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_AER_H
-
-#include <linux/tracepoint.h>
-#include <linux/aer.h>
-
-
-/*
- * PCIe AER Trace event
- *
- * These events are generated when hardware detects a corrected or
- * uncorrected event on a PCIe device. The event report has
- * the following structure:
- *
- * char * dev_name - The name of the slot where the device resides
- * ([domain:]bus:device.function).
- * u32 status - Either the correctable or uncorrectable register
- * indicating what error or errors have been seen
- * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
- */
-
-#define aer_correctable_errors \
- {BIT(0), "Receiver Error"}, \
- {BIT(6), "Bad TLP"}, \
- {BIT(7), "Bad DLLP"}, \
- {BIT(8), "RELAY_NUM Rollover"}, \
- {BIT(12), "Replay Timer Timeout"}, \
- {BIT(13), "Advisory Non-Fatal"}
-
-#define aer_uncorrectable_errors \
- {BIT(4), "Data Link Protocol"}, \
- {BIT(12), "Poisoned TLP"}, \
- {BIT(13), "Flow Control Protocol"}, \
- {BIT(14), "Completion Timeout"}, \
- {BIT(15), "Completer Abort"}, \
- {BIT(16), "Unexpected Completion"}, \
- {BIT(17), "Receiver Overflow"}, \
- {BIT(18), "Malformed TLP"}, \
- {BIT(19), "ECRC"}, \
- {BIT(20), "Unsupported Request"}
-
-TRACE_EVENT(aer_event,
- TP_PROTO(const char *dev_name,
- const u32 status,
- const u8 severity),
-
- TP_ARGS(dev_name, status, severity),
-
- TP_STRUCT__entry(
- __string( dev_name, dev_name )
- __field( u32, status )
- __field( u8, severity )
- ),
-
- TP_fast_assign(
- __assign_str(dev_name, dev_name);
- __entry->status = status;
- __entry->severity = severity;
- ),
-
- TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
- __get_str(dev_name),
- __entry->severity == AER_CORRECTABLE ? "Corrected" :
- __entry->severity == AER_FATAL ?
- "Fatal" : "Uncorrected, non-fatal",
- __entry->severity == AER_CORRECTABLE ?
- __print_flags(__entry->status, "|", aer_correctable_errors) :
- __print_flags(__entry->status, "|", aer_uncorrectable_errors))
-);
-
-#endif /* _TRACE_AER_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0fd06fef9fac..26b4f2e13275 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -44,6 +44,12 @@
#undef __field_ext
#define __field_ext(type, item, filter_type) type item;
+#undef __field_struct
+#define __field_struct(type, item) type item;
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type) type item;
+
#undef __array
#define __array(type, item, len) type item[len];
@@ -122,6 +128,12 @@
#undef __field_ext
#define __field_ext(type, item, filter_type)
+#undef __field_struct
+#define __field_struct(type, item)
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)
+
#undef __array
#define __array(type, item, len)
@@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \
if (ret) \
return ret;
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type) \
+ ret = trace_define_field(event_call, #type, #item, \
+ offsetof(typeof(field), item), \
+ sizeof(field.item), \
+ 0, filter_type); \
+ if (ret) \
+ return ret;
+
#undef __field
#define __field(type, item) __field_ext(type, item, FILTER_OTHER)
+#undef __field_struct
+#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER)
+
#undef __array
#define __array(type, item, len) \
do { \
@@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
#undef __field_ext
#define __field_ext(type, item, filter_type)
+#undef __field_struct
+#define __field_struct(type, item)
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)
+
#undef __array
#define __array(type, item, len)
@@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call( \
#undef __field
#define __field(type, item)
+#undef __field_struct
+#define __field_struct(type, item)
+
#undef __array
#define __array(type, item, len)
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index fed853f3d7aa..9674145e2f6a 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -4,6 +4,7 @@
#include <linux/tracepoint.h>
#include <linux/unistd.h>
#include <linux/ftrace_event.h>
+#include <linux/thread_info.h>
#include <asm/ptrace.h>
@@ -32,4 +33,18 @@ struct syscall_metadata {
struct ftrace_event_call *exit_event;
};
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
+static inline void syscall_tracepoint_update(struct task_struct *p)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ else
+ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+}
+#else
+static inline void syscall_tracepoint_update(struct task_struct *p)
+{
+}
+#endif
+
#endif /* _TRACE_SYSCALL_H */