From 70498253186586e5dca7bc3ebd3415203b059fbc Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 16 Jul 2012 18:35:29 -0700 Subject: kmsg - properly print over-long continuation lines Reserve PREFIX_MAX bytes in the LOG_LINE_MAX line when buffering a continuation line, to be able to properly prefix the LOG_LINE_MAX line with the syslog prefix and timestamp when printing it. Reported-By: Dave Jones Signed-off-by: Kay Sievers Cc: stable Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 177fa49357a5..d87ca5c6a989 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -235,7 +235,8 @@ static u32 log_next_idx; static u64 clear_seq; static u32 clear_idx; -#define LOG_LINE_MAX 1024 +#define PREFIX_MAX 32 +#define LOG_LINE_MAX 1024 - PREFIX_MAX /* record buffer */ #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) @@ -876,7 +877,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev, if (buf) { if (print_prefix(msg, syslog, NULL) + - text_len + 1>= size - len) + text_len + 1 >= size - len) break; if (prefix) @@ -907,7 +908,7 @@ static int syslog_print(char __user *buf, int size) struct log *msg; int len = 0; - text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); + text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) return -ENOMEM; @@ -930,7 +931,8 @@ static int syslog_print(char __user *buf, int size) skip = syslog_partial; msg = log_from_idx(syslog_idx); - n = msg_print_text(msg, syslog_prev, true, text, LOG_LINE_MAX); + n = msg_print_text(msg, syslog_prev, true, text, + LOG_LINE_MAX + PREFIX_MAX); if (n - syslog_partial <= size) { /* message fits into buffer, move forward */ syslog_idx = log_next(syslog_idx); @@ -969,7 +971,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) char *text; int len = 0; - text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); + text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) return -ENOMEM; @@ -1022,7 +1024,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear) struct log *msg = log_from_idx(idx); int textlen; - textlen = msg_print_text(msg, prev, true, text, LOG_LINE_MAX); + textlen = msg_print_text(msg, prev, true, text, + LOG_LINE_MAX + PREFIX_MAX); if (textlen < 0) { len = textlen; break; @@ -1367,15 +1370,15 @@ static struct cont { bool flushed:1; /* buffer sealed and committed */ } cont; -static void cont_flush(void) +static void cont_flush(enum log_flags flags) { if (cont.flushed) return; if (cont.len == 0) return; - log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec, - NULL, 0, cont.buf, cont.len); + log_store(cont.facility, cont.level, LOG_NOCONS | flags, + cont.ts_nsec, NULL, 0, cont.buf, cont.len); cont.flushed = true; } @@ -1386,7 +1389,8 @@ static bool cont_add(int facility, int level, const char *text, size_t len) return false; if (cont.len + len > sizeof(cont.buf)) { - cont_flush(); + /* the line gets too long, split it up in separate records */ + cont_flush(LOG_CONT); return false; } @@ -1522,7 +1526,7 @@ asmlinkage int vprintk_emit(int facility, int level, * or another task also prints continuation lines. */ if (cont.len && (lflags & LOG_PREFIX || cont.owner != current)) - cont_flush(); + cont_flush(0); /* buffer line if possible, otherwise store it right away */ if (!cont_add(facility, level, text, text_len)) @@ -1540,7 +1544,7 @@ asmlinkage int vprintk_emit(int facility, int level, if (cont.len && cont.owner == current) { if (!(lflags & LOG_PREFIX)) stored = cont_add(facility, level, text, text_len); - cont_flush(); + cont_flush(0); } if (!stored) @@ -1633,7 +1637,8 @@ EXPORT_SYMBOL(printk); #else -#define LOG_LINE_MAX 0 +#define LOG_LINE_MAX 0 +#define PREFIX_MAX 0 static struct cont { size_t len; size_t cons; @@ -1938,7 +1943,7 @@ static enum log_flags console_prev; */ void console_unlock(void) { - static char text[LOG_LINE_MAX]; + static char text[LOG_LINE_MAX + PREFIX_MAX]; static u64 seen_seq; unsigned long flags; bool wake_klogd = false; -- cgit v1.2.3 From 96efedf1491cdf0616e5e4fff0711cebf20f69c7 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 16 Jul 2012 18:35:29 -0700 Subject: kmsg - avoid warning for CONFIG_PRINTK=n compilations Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index d87ca5c6a989..6c3d5bf14da2 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -216,6 +216,7 @@ struct log { */ static DEFINE_RAW_SPINLOCK(logbuf_lock); +#ifdef CONFIG_PRINTK /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static u32 syslog_idx; @@ -228,7 +229,6 @@ static u32 log_first_idx; /* index and sequence number of the next record to store in the buffer */ static u64 log_next_seq; -#ifdef CONFIG_PRINTK static u32 log_next_idx; /* the next printk record to read after the last 'clear' command */ @@ -1635,10 +1635,17 @@ asmlinkage int printk(const char *fmt, ...) } EXPORT_SYMBOL(printk); -#else +#else /* CONFIG_PRINTK */ #define LOG_LINE_MAX 0 #define PREFIX_MAX 0 +#define LOG_LINE_MAX 0 +static u64 syslog_seq; +static u32 syslog_idx; +static enum log_flags syslog_prev; +static u64 log_first_seq; +static u32 log_first_idx; +static u64 log_next_seq; static struct cont { size_t len; size_t cons; -- cgit v1.2.3 From d39f3d77c9b1fe7cc33a14beb4a4849af0a4ac22 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 16 Jul 2012 18:35:30 -0700 Subject: kmsg - export "continuation record" flag to /dev/kmsg In some cases we are forced to store individual records for a continuation line print. Export a flag to allow the external re-construction of the line. The flag allows us to apply a similar logic externally which is used internally when the console, /proc/kmsg or the syslog() output is printed. $ cat /dev/kmsg 4,165,0,-;Free swap = 0kB 4,166,0,-;Total swap = 0kB 6,167,0,c;[ 4,168,0,+;0 4,169,0,+;1 4,170,0,+;2 4,171,0,+;3 4,172,0,+;] 6,173,0,-;[0 1 2 3 ] 6,174,0,-;Console: colour VGA+ 80x25 6,175,0,-;console [tty0] enabled Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/dev-kmsg | 29 ++++++++++++++++++++--------- kernel/printk.c | 23 +++++++++++++++++++++-- 2 files changed, 41 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg index 281ecc5f9709..7e7e07a82e0e 100644 --- a/Documentation/ABI/testing/dev-kmsg +++ b/Documentation/ABI/testing/dev-kmsg @@ -58,16 +58,18 @@ Description: The /dev/kmsg character device node provides userspace access The output format consists of a prefix carrying the syslog prefix including priority and facility, the 64 bit message - sequence number and the monotonic timestamp in microseconds. - The values are separated by a ','. Future extensions might - add more comma separated values before the terminating ';'. - Unknown values should be gracefully ignored. + sequence number and the monotonic timestamp in microseconds, + and a flag field. All fields are separated by a ','. + + Future extensions might add more comma separated values before + the terminating ';'. Unknown fields and values should be + gracefully ignored. The human readable text string starts directly after the ';' and is terminated by a '\n'. Untrusted values derived from hardware or other facilities are printed, therefore - all non-printable characters in the log message are escaped - by "\x00" C-style hex encoding. + all non-printable characters and '\' itself in the log message + are escaped by "\x00" C-style hex encoding. A line starting with ' ', is a continuation line, adding key/value pairs to the log message, which provide the machine @@ -75,11 +77,11 @@ Description: The /dev/kmsg character device node provides userspace access userspace. Example: - 7,160,424069;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored) + 7,160,424069,-;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored) SUBSYSTEM=acpi DEVICE=+acpi:PNP0A03:00 - 6,339,5140900;NET: Registered protocol family 10 - 30,340,5690716;udevd[80]: starting version 181 + 6,339,5140900,-;NET: Registered protocol family 10 + 30,340,5690716,-;udevd[80]: starting version 181 The DEVICE= key uniquely identifies devices the following way: b12:8 - block dev_t @@ -87,4 +89,13 @@ Description: The /dev/kmsg character device node provides userspace access n8 - netdev ifindex +sound:card0 - subsystem:devname + The flags field carries '-' by default. A 'c' indicates a + fragment of a line. All following fragments are flagged with + '+'. Note, that these hints about continuation lines are not + neccessarily correct, and the stream could be interleaved with + unrelated messages, but merging the lines in the output + usually produces better human readable results. A similar + logic is used internally when messages are printed to the + console, /proc/kmsg or the syslog() syscall. + Users: dmesg(1), userspace kernel log consumers diff --git a/kernel/printk.c b/kernel/printk.c index 6c3d5bf14da2..a41106e19077 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -361,6 +361,7 @@ static void log_store(int facility, int level, struct devkmsg_user { u64 seq; u32 idx; + enum log_flags prev; struct mutex lock; char buf[8192]; }; @@ -426,6 +427,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, struct log *msg; u64 ts_usec; size_t i; + char cont = '-'; size_t len; ssize_t ret; @@ -463,8 +465,25 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, msg = log_from_idx(user->idx); ts_usec = msg->ts_nsec; do_div(ts_usec, 1000); - len = sprintf(user->buf, "%u,%llu,%llu;", - (msg->facility << 3) | msg->level, user->seq, ts_usec); + + /* + * If we couldn't merge continuation line fragments during the print, + * export the stored flags to allow an optional external merge of the + * records. Merging the records isn't always neccessarily correct, like + * when we hit a race during printing. In most cases though, it produces + * better readable output. 'c' in the record flags mark the first + * fragment of a line, '+' the following. + */ + if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT)) + cont = 'c'; + else if ((msg->flags & LOG_CONT) || + ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))) + cont = '+'; + + len = sprintf(user->buf, "%u,%llu,%llu,%c;", + (msg->facility << 3) | msg->level, + user->seq, ts_usec, cont); + user->prev = msg->flags; /* escape non-printable characters */ for (i = 0; i < msg->text_len; i++) { -- cgit v1.2.3 From eab072609e11a357181806ab5a5c309ef6eb76f5 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 16 Jul 2012 18:35:30 -0700 Subject: kmsg - do not flush partial lines when the console is busy Fragments of continuation lines are flushed to the console immediately. In case the console is locked, the fragment must be queued up in the cont buffer. If the the console is busy and the continuation line is complete, but no part of it was written to the console up to this point, we can just store the entire line as a regular record and free the buffer earlier. If the console is busy and earlier messages are already queued up, we should not flush the fragments of continuation lines, but store them after the queued up messages, to ensure the proper ordering. This keeps the console output better readable in case printk()s race against each other, or we receive over-long continuation lines we need to flush. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 93 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index a41106e19077..4da2377131b0 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -231,6 +231,11 @@ static u32 log_first_idx; static u64 log_next_seq; static u32 log_next_idx; +/* the next printk record to write to the console */ +static u64 console_seq; +static u32 console_idx; +static enum log_flags console_prev; + /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; static u32 clear_idx; @@ -1386,6 +1391,7 @@ static struct cont { u64 ts_nsec; /* time of first print */ u8 level; /* log level of first message */ u8 facility; /* log level of first message */ + enum log_flags flags; /* prefix, newline flags */ bool flushed:1; /* buffer sealed and committed */ } cont; @@ -1396,10 +1402,25 @@ static void cont_flush(enum log_flags flags) if (cont.len == 0) return; - log_store(cont.facility, cont.level, LOG_NOCONS | flags, - cont.ts_nsec, NULL, 0, cont.buf, cont.len); - - cont.flushed = true; + if (cont.cons) { + /* + * If a fragment of this line was directly flushed to the + * console; wait for the console to pick up the rest of the + * line. LOG_NOCONS suppresses a duplicated output. + */ + log_store(cont.facility, cont.level, flags | LOG_NOCONS, + cont.ts_nsec, NULL, 0, cont.buf, cont.len); + cont.flags = flags; + cont.flushed = true; + } else { + /* + * If no fragment of this line ever reached the console, + * just submit it to the store and free the buffer. + */ + log_store(cont.facility, cont.level, flags, 0, + NULL, 0, cont.buf, cont.len); + cont.len = 0; + } } static bool cont_add(int facility, int level, const char *text, size_t len) @@ -1418,12 +1439,17 @@ static bool cont_add(int facility, int level, const char *text, size_t len) cont.level = level; cont.owner = current; cont.ts_nsec = local_clock(); + cont.flags = 0; cont.cons = 0; cont.flushed = false; } memcpy(cont.buf + cont.len, text, len); cont.len += len; + + if (cont.len > (sizeof(cont.buf) * 80) / 100) + cont_flush(LOG_CONT); + return true; } @@ -1432,7 +1458,7 @@ static size_t cont_print_text(char *text, size_t size) size_t textlen = 0; size_t len; - if (cont.cons == 0) { + if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) { textlen += print_time(cont.ts_nsec, text); size -= textlen; } @@ -1447,7 +1473,8 @@ static size_t cont_print_text(char *text, size_t size) } if (cont.flushed) { - text[textlen++] = '\n'; + if (cont.flags & LOG_NEWLINE) + text[textlen++] = '\n'; /* got everything, release buffer */ cont.len = 0; } @@ -1545,7 +1572,7 @@ asmlinkage int vprintk_emit(int facility, int level, * or another task also prints continuation lines. */ if (cont.len && (lflags & LOG_PREFIX || cont.owner != current)) - cont_flush(0); + cont_flush(LOG_NEWLINE); /* buffer line if possible, otherwise store it right away */ if (!cont_add(facility, level, text, text_len)) @@ -1563,7 +1590,7 @@ asmlinkage int vprintk_emit(int facility, int level, if (cont.len && cont.owner == current) { if (!(lflags & LOG_PREFIX)) stored = cont_add(facility, level, text, text_len); - cont_flush(0); + cont_flush(LOG_NEWLINE); } if (!stored) @@ -1661,10 +1688,13 @@ EXPORT_SYMBOL(printk); #define LOG_LINE_MAX 0 static u64 syslog_seq; static u32 syslog_idx; +static u64 console_seq; +static u32 console_idx; static enum log_flags syslog_prev; static u64 log_first_seq; static u32 log_first_idx; static u64 log_next_seq; +static enum log_flags console_prev; static struct cont { size_t len; size_t cons; @@ -1948,10 +1978,34 @@ void wake_up_klogd(void) this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); } -/* the next printk record to write to the console */ -static u64 console_seq; -static u32 console_idx; -static enum log_flags console_prev; +static void console_cont_flush(char *text, size_t size) +{ + unsigned long flags; + size_t len; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + + if (!cont.len) + goto out; + + /* + * We still queue earlier records, likely because the console was + * busy. The earlier ones need to be printed before this one, we + * did not flush any fragment so far, so just let it queue up. + */ + if (console_seq < log_next_seq && !cont.cons) + goto out; + + len = cont_print_text(text, size); + raw_spin_unlock(&logbuf_lock); + stop_critical_timings(); + call_console_drivers(cont.level, text, len); + start_critical_timings(); + local_irq_restore(flags); + return; +out: + raw_spin_unlock_irqrestore(&logbuf_lock, flags); +} /** * console_unlock - unlock the console system @@ -1983,19 +2037,7 @@ void console_unlock(void) console_may_schedule = 0; /* flush buffered message fragment immediately to console */ - raw_spin_lock_irqsave(&logbuf_lock, flags); - if (cont.len && (cont.cons < cont.len || cont.flushed)) { - size_t len; - - len = cont_print_text(text, sizeof(text)); - raw_spin_unlock(&logbuf_lock); - stop_critical_timings(); - call_console_drivers(cont.level, text, len); - start_critical_timings(); - local_irq_restore(flags); - } else - raw_spin_unlock_irqrestore(&logbuf_lock, flags); - + console_cont_flush(text, sizeof(text)); again: for (;;) { struct log *msg; @@ -2032,6 +2074,7 @@ skip: * will properly dump everything later. */ msg->flags &= ~LOG_NOCONS; + console_prev = msg->flags; goto skip; } -- cgit v1.2.3 From 6791457a090d9a234a40b501c2536f0aefaeae4b Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 18 Jul 2012 13:18:12 -0400 Subject: printk: Export struct log size and member offsets through vmcoreinfo There are tools like makedumpfile and vmcore-dmesg which can extract kernel log buffer from vmcore. Since we introduced structured logging, that functionality is broken. Now user space tools need to know about "struct log" and offsets of various fields to be able to parse struct log data and extract text message or dictonary. This patch exports some of the fields. Currently I am not exporting log "level" info as that is a bitfield and offsetof() bitfields can't be calculated. But if people start asking for log level info in the output then we probably either need to seprate out "level" or use bit shift operations for flags and level. Signed-off-by: Vivek Goyal Acked-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 4da2377131b0..449364f07a1e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -671,6 +671,15 @@ void log_buf_kexec_setup(void) VMCOREINFO_SYMBOL(log_buf_len); VMCOREINFO_SYMBOL(log_first_idx); VMCOREINFO_SYMBOL(log_next_idx); + /* + * Export struct log size and field offsets. User space tools can + * parse it and detect any changes to structure down the line. + */ + VMCOREINFO_STRUCT_SIZE(log); + VMCOREINFO_OFFSET(log, ts_nsec); + VMCOREINFO_OFFSET(log, len); + VMCOREINFO_OFFSET(log, text_len); + VMCOREINFO_OFFSET(log, dict_len); } #endif -- cgit v1.2.3