diff options
author | H. Peter Anvin <hpa@zytor.com> | 2014-07-14 18:26:47 +0200 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2014-07-14 18:27:25 +0200 |
commit | cbd4ebcba16c31abc341e1810e6e0fb4c35117b3 (patch) | |
tree | 49a3b54c709d11efbee631a85cca5fb550515005 /include | |
parent | x86, MCE: Robustify mcheck_init_device (diff) | |
parent | RAS, extlog: Adjust init flow (diff) | |
download | linux-cbd4ebcba16c31abc341e1810e6e0fb4c35117b3.tar.xz linux-cbd4ebcba16c31abc341e1810e6e0fb4c35117b3.zip |
Merge tag 'please-pull-extlog-trace' into x86/ras
Report extended error information ("extlog") using
a trace/event. Provide a mechanism for a smart
daemon collecting this information to tell the kernel
to skip logging corrected errors to the console.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cper.h | 32 | ||||
-rw-r--r-- | include/linux/ras.h | 14 | ||||
-rw-r--r-- | include/ras/ras_event.h | 128 | ||||
-rw-r--r-- | include/trace/events/ras.h | 77 | ||||
-rw-r--r-- | include/trace/ftrace.h | 33 | ||||
-rw-r--r-- | include/trace/syscall.h | 15 |
6 files changed, 222 insertions, 77 deletions
diff --git a/include/linux/cper.h b/include/linux/cper.h index 2fc0ec3d89cc..76abba4b238e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -22,6 +22,7 @@ #define LINUX_CPER_H #include <linux/uuid.h> +#include <linux/trace_seq.h> /* CPER record signature and the size */ #define CPER_SIG_RECORD "CPER" @@ -36,6 +37,13 @@ #define CPER_RECORD_REV 0x0100 /* + * CPER record length contains the CPER fields which are relevant for further + * handling of a memory error in userspace (we don't carry all the fields + * defined in the UEFI spec because some of them don't make any sense.) + * Currently, a length of 256 should be more than enough. + */ +#define CPER_REC_LEN 256 +/* * Severity difinition for error_severity in struct cper_record_header * and section_severity in struct cper_section_descriptor */ @@ -356,6 +364,24 @@ struct cper_sec_mem_err { __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ }; +struct cper_mem_err_compact { + __u64 validation_bits; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u16 rank; + __u16 mem_array_handle; + __u16 mem_dev_handle; +}; + struct cper_sec_pcie { __u64 validation_bits; __u32 port_type; @@ -395,7 +421,13 @@ struct cper_sec_pcie { #pragma pack() u64 cper_next_record_id(void); +const char *cper_severity_str(unsigned int); +const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); +void cper_mem_err_pack(const struct cper_sec_mem_err *, + struct cper_mem_err_compact *); +const char *cper_mem_err_unpack(struct trace_seq *, + struct cper_mem_err_compact *); #endif diff --git a/include/linux/ras.h b/include/linux/ras.h new file mode 100644 index 000000000000..2aceeafd6fe5 --- /dev/null +++ b/include/linux/ras.h @@ -0,0 +1,14 @@ +#ifndef __RAS_H__ +#define __RAS_H__ + +#ifdef CONFIG_DEBUG_FS +int ras_userspace_consumers(void); +void ras_debugfs_init(void); +int ras_add_daemon_trace(void); +#else +static inline int ras_userspace_consumers(void) { return 0; } +static inline void ras_debugfs_init(void) { return; } +static inline int ras_add_daemon_trace(void) { return 0; } +#endif + +#endif diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 21cdb0b7b0fb..47da53c27ffa 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -8,6 +8,71 @@ #include <linux/tracepoint.h> #include <linux/edac.h> #include <linux/ktime.h> +#include <linux/aer.h> +#include <linux/cper.h> + +/* + * MCE Extended Error Log trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event. + */ + +/* memory trace event */ + +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +TRACE_EVENT(extlog_mem_event, + TP_PROTO(struct cper_sec_mem_err *mem, + u32 err_seq, + const uuid_le *fru_id, + const char *fru_text, + u8 sev), + + TP_ARGS(mem, err_seq, fru_id, fru_text, sev), + + TP_STRUCT__entry( + __field(u32, err_seq) + __field(u8, etype) + __field(u8, sev) + __field(u64, pa) + __field(u8, pa_mask_lsb) + __field_struct(uuid_le, fru_id) + __string(fru_text, fru_text) + __field_struct(struct cper_mem_err_compact, data) + ), + + TP_fast_assign( + __entry->err_seq = err_seq; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) + __entry->etype = mem->error_type; + else + __entry->etype = ~0; + __entry->sev = sev; + if (mem->validation_bits & CPER_MEM_VALID_PA) + __entry->pa = mem->physical_addr; + else + __entry->pa = ~0ull; + + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); + else + __entry->pa_mask_lsb = ~0; + __entry->fru_id = *fru_id; + __assign_str(fru_text, fru_text); + cper_mem_err_pack(mem, &__entry->data); + ), + + TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", + __entry->err_seq, + cper_severity_str(__entry->sev), + cper_mem_err_type_str(__entry->etype), + __entry->pa, + __entry->pa_mask_lsb, + cper_mem_err_unpack(p, &__entry->data), + &__entry->fru_id, + __get_str(fru_text)) +); +#endif /* * Hardware Events Report @@ -94,6 +159,69 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == AER_CORRECTABLE ? "Corrected" : + __entry->severity == AER_FATAL ? + "Fatal" : "Uncorrected, non-fatal", + __entry->severity == AER_CORRECTABLE ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */ diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h deleted file mode 100644 index 1c875ad1ee5f..000000000000 --- a/include/trace/events/ras.h +++ /dev/null @@ -1,77 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ras - -#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_AER_H - -#include <linux/tracepoint.h> -#include <linux/aer.h> - - -/* - * PCIe AER Trace event - * - * These events are generated when hardware detects a corrected or - * uncorrected event on a PCIe device. The event report has - * the following structure: - * - * char * dev_name - The name of the slot where the device resides - * ([domain:]bus:device.function). - * u32 status - Either the correctable or uncorrectable register - * indicating what error or errors have been seen - * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED - */ - -#define aer_correctable_errors \ - {BIT(0), "Receiver Error"}, \ - {BIT(6), "Bad TLP"}, \ - {BIT(7), "Bad DLLP"}, \ - {BIT(8), "RELAY_NUM Rollover"}, \ - {BIT(12), "Replay Timer Timeout"}, \ - {BIT(13), "Advisory Non-Fatal"} - -#define aer_uncorrectable_errors \ - {BIT(4), "Data Link Protocol"}, \ - {BIT(12), "Poisoned TLP"}, \ - {BIT(13), "Flow Control Protocol"}, \ - {BIT(14), "Completion Timeout"}, \ - {BIT(15), "Completer Abort"}, \ - {BIT(16), "Unexpected Completion"}, \ - {BIT(17), "Receiver Overflow"}, \ - {BIT(18), "Malformed TLP"}, \ - {BIT(19), "ECRC"}, \ - {BIT(20), "Unsupported Request"} - -TRACE_EVENT(aer_event, - TP_PROTO(const char *dev_name, - const u32 status, - const u8 severity), - - TP_ARGS(dev_name, status, severity), - - TP_STRUCT__entry( - __string( dev_name, dev_name ) - __field( u32, status ) - __field( u8, severity ) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name); - __entry->status = status; - __entry->severity = severity; - ), - - TP_printk("%s PCIe Bus Error: severity=%s, %s\n", - __get_str(dev_name), - __entry->severity == AER_CORRECTABLE ? "Corrected" : - __entry->severity == AER_FATAL ? - "Fatal" : "Uncorrected, non-fatal", - __entry->severity == AER_CORRECTABLE ? - __print_flags(__entry->status, "|", aer_correctable_errors) : - __print_flags(__entry->status, "|", aer_uncorrectable_errors)) -); - -#endif /* _TRACE_AER_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0fd06fef9fac..26b4f2e13275 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -44,6 +44,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) type item; +#undef __field_struct +#define __field_struct(type, item) type item; + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; @@ -122,6 +128,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ if (ret) \ return ret; +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + 0, filter_type); \ + if (ret) \ + return ret; + #undef __field #define __field(type, item) __field_ext(type, item, FILTER_OTHER) +#undef __field_struct +#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ do { \ @@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call( \ #undef __field #define __field(type, item) +#undef __field_struct +#define __field_struct(type, item) + #undef __array #define __array(type, item, len) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index fed853f3d7aa..9674145e2f6a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -4,6 +4,7 @@ #include <linux/tracepoint.h> #include <linux/unistd.h> #include <linux/ftrace_event.h> +#include <linux/thread_info.h> #include <asm/ptrace.h> @@ -32,4 +33,18 @@ struct syscall_metadata { struct ftrace_event_call *exit_event; }; +#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) +static inline void syscall_tracepoint_update(struct task_struct *p) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + else + clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); +} +#else +static inline void syscall_tracepoint_update(struct task_struct *p) +{ +} +#endif + #endif /* _TRACE_SYSCALL_H */ |