summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorGrant Likely <grant.likely@secretlab.ca>2012-07-11 17:08:35 +0200
committerGrant Likely <grant.likely@secretlab.ca>2012-07-11 17:08:35 +0200
commit80c1834fc86c2bbacb54a8fc3c04a8b0066b0996 (patch)
tree8200248706960af8b779e9144f5b51c670602228 /kernel
parentirq_domain: correct a minor wrong comment for linear revmap (diff)
parentLinux 3.5-rc6 (diff)
downloadlinux-80c1834fc86c2bbacb54a8fc3c04a8b0066b0996.tar.xz
linux-80c1834fc86c2bbacb54a8fc3c04a8b0066b0996.zip
Merge tag 'v3.5-rc6' into irqdomain/next
Linux 3.5-rc6
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c13
-rw-r--r--kernel/events/core.c10
-rw-r--r--kernel/exit.c19
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/pid_namespace.c20
-rw-r--r--kernel/printk.c532
-rw-r--r--kernel/rcutree.c16
-rw-r--r--kernel/rcutree.h14
-rw-r--r--kernel/rcutree_plugin.h165
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/watchdog.c19
14 files changed, 617 insertions, 223 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 72fcd3069a90..2097684cf194 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -255,12 +255,17 @@ int cgroup_lock_is_held(void)
EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
+static int css_unbias_refcnt(int refcnt)
+{
+ return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
+}
+
/* the current nr of refs, always >= 0 whether @css is deactivated or not */
static int css_refcnt(struct cgroup_subsys_state *css)
{
int v = atomic_read(&css->refcnt);
- return v >= 0 ? v : v - CSS_DEACT_BIAS;
+ return css_unbias_refcnt(v);
}
/* convenient tests for these bits */
@@ -4982,10 +4987,12 @@ EXPORT_SYMBOL_GPL(__css_tryget);
void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
+ int v;
rcu_read_lock();
- atomic_dec(&css->refcnt);
- switch (css_refcnt(css)) {
+ v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
+
+ switch (v) {
case 1:
if (notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f85c0154b333..d7d71d6ec972 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -253,9 +253,9 @@ perf_cgroup_match(struct perf_event *event)
return !event->cgrp || event->cgrp == cpuctx->cgrp;
}
-static inline void perf_get_cgroup(struct perf_event *event)
+static inline bool perf_tryget_cgroup(struct perf_event *event)
{
- css_get(&event->cgrp->css);
+ return css_tryget(&event->cgrp->css);
}
static inline void perf_put_cgroup(struct perf_event *event)
@@ -484,7 +484,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
event->cgrp = cgrp;
/* must be done before we fput() the file */
- perf_get_cgroup(event);
+ if (!perf_tryget_cgroup(event)) {
+ event->cgrp = NULL;
+ ret = -ENOENT;
+ goto out;
+ }
/*
* all events in a group must monitor
diff --git a/kernel/exit.c b/kernel/exit.c
index 34867cc5b42a..2f59cc334516 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,18 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
__this_cpu_dec(process_counts);
+ /*
+ * If we are the last child process in a pid namespace to be
+ * reaped, notify the reaper sleeping zap_pid_ns_processes().
+ */
+ if (IS_ENABLED(CONFIG_PID_NS)) {
+ struct task_struct *parent = p->real_parent;
+
+ if ((task_active_pid_ns(parent)->child_reaper == parent) &&
+ list_empty(&parent->children) &&
+ (parent->flags & PF_EXITING))
+ wake_up_process(parent);
+ }
}
list_del_rcu(&p->thread_group);
}
@@ -643,6 +655,7 @@ static void exit_mm(struct task_struct * tsk)
mm_release(tsk, mm);
if (!mm)
return;
+ sync_mm_rss(mm);
/*
* Serialize with any possible pending coredump.
* We must hold mmap_sem around checking core_state
@@ -719,12 +732,6 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock);
- /*
- * We can not clear ->child_reaper or leave it alone.
- * There may by stealth EXIT_DEAD tasks on ->children,
- * forget_original_parent() must move them somewhere.
- */
- pid_ns->child_reaper = init_pid_ns.child_reaper;
} else if (father->signal->has_child_subreaper) {
struct task_struct *reaper;
diff --git a/kernel/panic.c b/kernel/panic.c
index 8ed89a175d79..d2a5f4ecc6dd 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -27,7 +27,7 @@
#define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18
-int panic_on_oops;
+int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
static unsigned long tainted_mask;
static int pause_on_oops;
static int pause_on_oops_flag;
@@ -108,8 +108,6 @@ void panic(const char *fmt, ...)
*/
crash_kexec(NULL);
- kmsg_dump(KMSG_DUMP_PANIC);
-
/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a panic
@@ -117,6 +115,8 @@ void panic(const char *fmt, ...)
*/
smp_send_stop();
+ kmsg_dump(KMSG_DUMP_PANIC);
+
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
bust_spinlocks(0);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 16b20e38c4a1..b3c7fd554250 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
}
read_unlock(&tasklist_lock);
+ /* Firstly reap the EXIT_ZOMBIE children we may have. */
do {
clear_thread_flag(TIF_SIGPENDING);
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
+ /*
+ * sys_wait4() above can't reap the TASK_DEAD children.
+ * Make sure they all go away, see __unhash_process().
+ */
+ for (;;) {
+ bool need_wait = false;
+
+ read_lock(&tasklist_lock);
+ if (!list_empty(&current->children)) {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ need_wait = true;
+ }
+ read_unlock(&tasklist_lock);
+
+ if (!need_wait)
+ break;
+ schedule();
+ }
+
if (pid_ns->reboot)
current->signal->group_exit_code = pid_ns->reboot;
diff --git a/kernel/printk.c b/kernel/printk.c
index 32462d2b364a..dba18211685e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -193,12 +193,19 @@ static int console_may_schedule;
* separated by ',', and find the message after the ';' character.
*/
+enum log_flags {
+ LOG_DEFAULT = 0,
+ LOG_NOCONS = 1, /* already flushed, do not print to console */
+};
+
struct log {
u64 ts_nsec; /* timestamp in nanoseconds */
u16 len; /* length of entire record */
u16 text_len; /* length of text buffer */
u16 dict_len; /* length of dictionary buffer */
- u16 level; /* syslog level + facility */
+ u8 facility; /* syslog facility */
+ u8 flags:5; /* internal record flags */
+ u8 level:3; /* syslog level */
};
/*
@@ -227,10 +234,10 @@ static u32 clear_idx;
#define LOG_LINE_MAX 1024
/* record buffer */
-#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
#define LOG_ALIGN 4
#else
-#define LOG_ALIGN 8
+#define LOG_ALIGN __alignof__(struct log)
#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
/* insert record into the buffer, discard old ones, update heads */
static void log_store(int facility, int level,
+ enum log_flags flags, u64 ts_nsec,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
@@ -329,8 +337,13 @@ static void log_store(int facility, int level,
msg->text_len = text_len;
memcpy(log_dict(msg), dict, dict_len);
msg->dict_len = dict_len;
- msg->level = (facility << 3) | (level & 7);
- msg->ts_nsec = local_clock();
+ msg->facility = facility;
+ msg->level = level & 7;
+ msg->flags = flags & 0x1f;
+ if (ts_nsec > 0)
+ msg->ts_nsec = ts_nsec;
+ else
+ msg->ts_nsec = local_clock();
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
@@ -414,7 +427,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (!user)
return -EBADF;
- mutex_lock(&user->lock);
+ ret = mutex_lock_interruptible(&user->lock);
+ if (ret)
+ return ret;
raw_spin_lock(&logbuf_lock);
while (user->seq == log_next_seq) {
if (file->f_flags & O_NONBLOCK) {
@@ -444,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
ts_usec = msg->ts_nsec;
do_div(ts_usec, 1000);
len = sprintf(user->buf, "%u,%llu,%llu;",
- msg->level, user->seq, ts_usec);
+ (msg->facility << 3) | msg->level, user->seq, ts_usec);
/* escape non-printable characters */
for (i = 0; i < msg->text_len; i++) {
@@ -785,6 +800,21 @@ static bool printk_time;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
+static size_t print_time(u64 ts, char *buf)
+{
+ unsigned long rem_nsec;
+
+ if (!printk_time)
+ return 0;
+
+ if (!buf)
+ return 15;
+
+ rem_nsec = do_div(ts, 1000000000);
+ return sprintf(buf, "[%5lu.%06lu] ",
+ (unsigned long)ts, rem_nsec / 1000);
+}
+
static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
{
size_t len = 0;
@@ -801,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
}
}
- if (printk_time) {
- if (buf) {
- unsigned long long ts = msg->ts_nsec;
- unsigned long rem_nsec = do_div(ts, 1000000000);
-
- len += sprintf(buf + len, "[%5lu.%06lu] ",
- (unsigned long) ts, rem_nsec / 1000);
- } else {
- len += 15;
- }
- }
-
+ len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
return len;
}
@@ -860,26 +879,49 @@ static int syslog_print(char __user *buf, int size)
{
char *text;
struct log *msg;
- int len;
+ int len = 0;
text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
- raw_spin_lock_irq(&logbuf_lock);
- if (syslog_seq < log_first_seq) {
- /* messages are gone, move to first one */
- syslog_seq = log_first_seq;
- syslog_idx = log_first_idx;
- }
- msg = log_from_idx(syslog_idx);
- len = msg_print_text(msg, true, text, LOG_LINE_MAX);
- syslog_idx = log_next(syslog_idx);
- syslog_seq++;
- raw_spin_unlock_irq(&logbuf_lock);
+ while (size > 0) {
+ size_t n;
+
+ raw_spin_lock_irq(&logbuf_lock);
+ if (syslog_seq < log_first_seq) {
+ /* messages are gone, move to first one */
+ syslog_seq = log_first_seq;
+ syslog_idx = log_first_idx;
+ }
+ if (syslog_seq == log_next_seq) {
+ raw_spin_unlock_irq(&logbuf_lock);
+ break;
+ }
+ msg = log_from_idx(syslog_idx);
+ n = msg_print_text(msg, true, text, LOG_LINE_MAX);
+ if (n <= size) {
+ syslog_idx = log_next(syslog_idx);
+ syslog_seq++;
+ } else
+ n = 0;
+ raw_spin_unlock_irq(&logbuf_lock);
+
+ if (!n)
+ break;
+
+ len += n;
+ size -= n;
+ buf += n;
+ n = copy_to_user(buf - n, text, n);
- if (len > 0 && copy_to_user(buf, text, len))
- len = -EFAULT;
+ if (n) {
+ len -= n;
+ if (!len)
+ len = -EFAULT;
+ break;
+ }
+ }
kfree(text);
return len;
@@ -909,7 +951,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
- */
+ */
seq = clear_seq;
idx = clear_idx;
while (seq < log_next_seq) {
@@ -919,6 +961,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
idx = log_next(idx);
seq++;
}
+
+ /* move first record forward until length fits into the buffer */
seq = clear_seq;
idx = clear_idx;
while (len > size && seq < log_next_seq) {
@@ -929,7 +973,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
seq++;
}
- /* last message in this dump */
+ /* last message fitting into this dump */
next_seq = log_next_seq;
len = 0;
@@ -974,6 +1018,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
{
bool clear = false;
static int saved_console_loglevel = -1;
+ static DEFINE_MUTEX(syslog_mutex);
int error;
error = check_syslog_permissions(type, from_file);
@@ -1000,11 +1045,17 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
error = -EFAULT;
goto out;
}
+ error = mutex_lock_interruptible(&syslog_mutex);
+ if (error)
+ goto out;
error = wait_event_interruptible(log_wait,
syslog_seq != log_next_seq);
- if (error)
+ if (error) {
+ mutex_unlock(&syslog_mutex);
goto out;
+ }
error = syslog_print(buf, len);
+ mutex_unlock(&syslog_mutex);
break;
/* Read/clear last kernel messages */
case SYSLOG_ACTION_READ_CLEAR:
@@ -1027,6 +1078,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
/* Clear ring buffer */
case SYSLOG_ACTION_CLEAR:
syslog_print_all(NULL, 0, true);
+ break;
/* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_OFF:
if (saved_console_loglevel == -1)
@@ -1259,15 +1311,92 @@ static inline void printk_delay(void)
}
}
+/*
+ * Continuation lines are buffered, and not committed to the record buffer
+ * until the line is complete, or a race forces it. The line fragments
+ * though, are printed immediately to the consoles to ensure everything has
+ * reached the console in case of a kernel crash.
+ */
+static struct cont {
+ char buf[LOG_LINE_MAX];
+ size_t len; /* length == 0 means unused buffer */
+ size_t cons; /* bytes written to console */
+ struct task_struct *owner; /* task of first print*/
+ u64 ts_nsec; /* time of first print */
+ u8 level; /* log level of first message */
+ u8 facility; /* log level of first message */
+ bool flushed:1; /* buffer sealed and committed */
+} cont;
+
+static void cont_flush(void)
+{
+ if (cont.flushed)
+ return;
+ if (cont.len == 0)
+ return;
+
+ log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
+ NULL, 0, cont.buf, cont.len);
+
+ cont.flushed = true;
+}
+
+static bool cont_add(int facility, int level, const char *text, size_t len)
+{
+ if (cont.len && cont.flushed)
+ return false;
+
+ if (cont.len + len > sizeof(cont.buf)) {
+ cont_flush();
+ return false;
+ }
+
+ if (!cont.len) {
+ cont.facility = facility;
+ cont.level = level;
+ cont.owner = current;
+ cont.ts_nsec = local_clock();
+ cont.cons = 0;
+ cont.flushed = false;
+ }
+
+ memcpy(cont.buf + cont.len, text, len);
+ cont.len += len;
+ return true;
+}
+
+static size_t cont_print_text(char *text, size_t size)
+{
+ size_t textlen = 0;
+ size_t len;
+
+ if (cont.cons == 0) {
+ textlen += print_time(cont.ts_nsec, text);
+ size -= textlen;
+ }
+
+ len = cont.len - cont.cons;
+ if (len > 0) {
+ if (len+1 > size)
+ len = size-1;
+ memcpy(text + textlen, cont.buf + cont.cons, len);
+ textlen += len;
+ cont.cons = cont.len;
+ }
+
+ if (cont.flushed) {
+ text[textlen++] = '\n';
+ /* got everything, release buffer */
+ cont.len = 0;
+ }
+ return textlen;
+}
+
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
static int recursion_bug;
- static char cont_buf[LOG_LINE_MAX];
- static size_t cont_len;
- static int cont_level;
- static struct task_struct *cont_task;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len;
@@ -1313,7 +1442,8 @@ asmlinkage int vprintk_emit(int facility, int level,
recursion_bug = 0;
printed_len += strlen(recursion_msg);
/* emit KERN_CRIT message */
- log_store(0, 2, NULL, 0, recursion_msg, printed_len);
+ log_store(0, 2, LOG_DEFAULT, 0,
+ NULL, 0, recursion_msg, printed_len);
}
/*
@@ -1351,55 +1481,37 @@ asmlinkage int vprintk_emit(int facility, int level,
}
if (!newline) {
- if (cont_len && (prefix || cont_task != current)) {
- /*
- * Flush earlier buffer, which is either from a
- * different thread, or when we got a new prefix.
- */
- log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
- cont_len = 0;
- }
-
- if (!cont_len) {
- cont_level = level;
- cont_task = current;
- }
+ /*
+ * Flush the conflicting buffer. An earlier newline was missing,
+ * or another task also prints continuation lines.
+ */
+ if (cont.len && (prefix || cont.owner != current))
+ cont_flush();
- /* buffer or append to earlier buffer from the same thread */
- if (cont_len + text_len > sizeof(cont_buf))
- text_len = sizeof(cont_buf) - cont_len;
- memcpy(cont_buf + cont_len, text, text_len);
- cont_len += text_len;
+ /* buffer line if possible, otherwise store it right away */
+ if (!cont_add(facility, level, text, text_len))
+ log_store(facility, level, LOG_DEFAULT, 0,
+ dict, dictlen, text, text_len);
} else {
- if (cont_len && cont_task == current) {
- if (prefix) {
- /*
- * New prefix from the same thread; flush. We
- * either got no earlier newline, or we race
- * with an interrupt.
- */
- log_store(facility, cont_level,
- NULL, 0, cont_buf, cont_len);
- cont_len = 0;
- }
+ bool stored = false;
- /* append to the earlier buffer and flush */
- if (cont_len + text_len > sizeof(cont_buf))
- text_len = sizeof(cont_buf) - cont_len;
- memcpy(cont_buf + cont_len, text, text_len);
- cont_len += text_len;
- log_store(facility, cont_level,
- NULL, 0, cont_buf, cont_len);
- cont_len = 0;
- cont_task = NULL;
- printed_len = cont_len;
- } else {
- /* ordinary single and terminated line */
- log_store(facility, level,
- dict, dictlen, text, text_len);
- printed_len = text_len;
+ /*
+ * If an earlier newline was missing and it was the same task,
+ * either merge it with the current buffer and flush, or if
+ * there was a race with interrupts (prefix == true) then just
+ * flush it out and store this line separately.
+ */
+ if (cont.len && cont.owner == current) {
+ if (!prefix)
+ stored = cont_add(facility, level, text, text_len);
+ cont_flush();
}
+
+ if (!stored)
+ log_store(facility, level, LOG_DEFAULT, 0,
+ dict, dictlen, text, text_len);
}
+ printed_len += text_len;
/*
* Try to acquire and then immediately release the console semaphore.
@@ -1486,11 +1598,18 @@ EXPORT_SYMBOL(printk);
#else
#define LOG_LINE_MAX 0
+static struct cont {
+ size_t len;
+ size_t cons;
+ u8 level;
+ bool flushed:1;
+} cont;
static struct log *log_from_idx(u32 idx) { return NULL; }
static u32 log_next(u32 idx) { return 0; }
static void call_console_drivers(int level, const char *text, size_t len) {}
static size_t msg_print_text(const struct log *msg, bool syslog,
char *buf, size_t size) { return 0; }
+static size_t cont_print_text(char *text, size_t size) { return 0; }
#endif /* CONFIG_PRINTK */
@@ -1782,6 +1901,7 @@ static u32 console_idx;
*/
void console_unlock(void)
{
+ static char text[LOG_LINE_MAX];
static u64 seen_seq;
unsigned long flags;
bool wake_klogd = false;
@@ -1794,10 +1914,23 @@ void console_unlock(void)
console_may_schedule = 0;
+ /* flush buffered message fragment immediately to console */
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ if (cont.len && (cont.cons < cont.len || cont.flushed)) {
+ size_t len;
+
+ len = cont_print_text(text, sizeof(text));
+ raw_spin_unlock(&logbuf_lock);
+ stop_critical_timings();
+ call_console_drivers(cont.level, text, len);
+ start_critical_timings();
+ local_irq_restore(flags);
+ } else
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
again:
for (;;) {
struct log *msg;
- static char text[LOG_LINE_MAX];
size_t len;
int level;
@@ -1812,13 +1945,22 @@ again:
console_seq = log_first_seq;
console_idx = log_first_idx;
}
-
+skip:
if (console_seq == log_next_seq)
break;
msg = log_from_idx(console_idx);
- level = msg->level & 7;
+ if (msg->flags & LOG_NOCONS) {
+ /*
+ * Skip record we have buffered and already printed
+ * directly to the console when we received it.
+ */
+ console_idx = log_next(console_idx);
+ console_seq++;
+ goto skip;
+ }
+ level = msg->level;
len = msg_print_text(msg, false, text, sizeof(text));
console_idx = log_next(console_idx);
@@ -2300,48 +2442,210 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
* kmsg_dump - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping
*
- * Iterate through each of the dump devices and call the oops/panic
- * callbacks with the log buffer.
+ * Call each of the registered dumper's dump() callback, which can
+ * retrieve the kmsg records with kmsg_dump_get_line() or
+ * kmsg_dump_get_buffer().
*/
void kmsg_dump(enum kmsg_dump_reason reason)
{
- u64 idx;
struct kmsg_dumper *dumper;
- const char *s1, *s2;
- unsigned long l1, l2;
unsigned long flags;
if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
return;
- /* Theoretically, the log could move on after we do this, but
- there's not a lot we can do about that. The new messages
- will overwrite the start of what we dump. */
+ rcu_read_lock();
+ list_for_each_entry_rcu(dumper, &dump_list, list) {
+ if (dumper->max_reason && reason > dumper->max_reason)
+ continue;
+
+ /* initialize iterator with data about the stored records */
+ dumper->active = true;
+
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ dumper->cur_seq = clear_seq;
+ dumper->cur_idx = clear_idx;
+ dumper->next_seq = log_next_seq;
+ dumper->next_idx = log_next_idx;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
+ /* invoke dumper which will iterate over records */
+ dumper->dump(dumper, reason);
+
+ /* reset iterator */
+ dumper->active = false;
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * kmsg_dump_get_line - retrieve one kmsg log line
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @line: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the beginning of the kmsg buffer, with the oldest kmsg
+ * record, and copy one record into the provided buffer.
+ *
+ * Consecutive calls will return the next available record moving
+ * towards the end of the buffer with the youngest messages.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+ char *line, size_t size, size_t *len)
+{
+ unsigned long flags;
+ struct log *msg;
+ size_t l = 0;
+ bool ret = false;
+
+ if (!dumper->active)
+ goto out;
raw_spin_lock_irqsave(&logbuf_lock, flags);
- if (syslog_seq < log_first_seq)
- idx = syslog_idx;
- else
- idx = log_first_idx;
+ if (dumper->cur_seq < log_first_seq) {
+ /* messages are gone, move to first available one */
+ dumper->cur_seq = log_first_seq;
+ dumper->cur_idx = log_first_idx;
+ }
- if (idx > log_next_idx) {
- s1 = log_buf;
- l1 = log_next_idx;
+ /* last entry */
+ if (dumper->cur_seq >= log_next_seq) {
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ goto out;
+ }
- s2 = log_buf + idx;
- l2 = log_buf_len - idx;
- } else {
- s1 = "";
- l1 = 0;
+ msg = log_from_idx(dumper->cur_idx);
+ l = msg_print_text(msg, syslog,
+ line, size);
- s2 = log_buf + idx;
- l2 = log_next_idx - idx;
+ dumper->cur_idx = log_next(dumper->cur_idx);
+ dumper->cur_seq++;
+ ret = true;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+out:
+ if (len)
+ *len = l;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
+
+/**
+ * kmsg_dump_get_buffer - copy kmsg log lines
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @buf: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the end of the kmsg buffer and fill the provided buffer
+ * with as many of the the *youngest* kmsg records that fit into it.
+ * If the buffer is large enough, all available kmsg records will be
+ * copied with a single call.
+ *
+ * Consecutive calls will fill the buffer with the next block of
+ * available older records, not including the earlier retrieved ones.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+ char *buf, size_t size, size_t *len)
+{
+ unsigned long flags;
+ u64 seq;
+ u32 idx;
+ u64 next_seq;
+ u32 next_idx;
+ size_t l = 0;
+ bool ret = false;
+
+ if (!dumper->active)
+ goto out;
+
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ if (dumper->cur_seq < log_first_seq) {
+ /* messages are gone, move to first available one */
+ dumper->cur_seq = log_first_seq;
+ dumper->cur_idx = log_first_idx;
}
+
+ /* last entry */
+ if (dumper->cur_seq >= dumper->next_seq) {
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ goto out;
+ }
+
+ /* calculate length of entire buffer */
+ seq = dumper->cur_seq;
+ idx = dumper->cur_idx;
+ while (seq < dumper->next_seq) {
+ struct log *msg = log_from_idx(idx);
+
+ l += msg_print_text(msg, true, NULL, 0);
+ idx = log_next(idx);
+ seq++;
+ }
+
+ /* move first record forward until length fits into the buffer */
+ seq = dumper->cur_seq;
+ idx = dumper->cur_idx;
+ while (l > size && seq < dumper->next_seq) {
+ struct log *msg = log_from_idx(idx);
+
+ l -= msg_print_text(msg, true, NULL, 0);
+ idx = log_next(idx);
+ seq++;
+ }
+
+ /* last message in next interation */
+ next_seq = seq;
+ next_idx = idx;
+
+ l = 0;
+ while (seq < dumper->next_seq) {
+ struct log *msg = log_from_idx(idx);
+
+ l += msg_print_text(msg, syslog,
+ buf + l, size - l);
+
+ idx = log_next(idx);
+ seq++;
+ }
+
+ dumper->next_seq = next_seq;
+ dumper->next_idx = next_idx;
+ ret = true;
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+out:
+ if (len)
+ *len = l;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
- rcu_read_lock();
- list_for_each_entry_rcu(dumper, &dump_list, list)
- dumper->dump(dumper, reason, s1, l1, s2, l2);
- rcu_read_unlock();
+/**
+ * kmsg_dump_rewind - reset the interator
+ * @dumper: registered kmsg dumper
+ *
+ * Reset the dumper's iterator so that kmsg_dump_get_line() and
+ * kmsg_dump_get_buffer() can be called again and used multiple
+ * times within the same dumper.dump() callback.
+ */
+void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ dumper->cur_seq = clear_seq;
+ dumper->cur_idx = clear_idx;
+ dumper->next_seq = log_next_seq;
+ dumper->next_idx = log_next_idx;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
}
+EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
#endif
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0da7b88d92d0..38ecdda3f55f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1397,6 +1397,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
rdp->qlen_lazy += rsp->qlen_lazy;
rdp->qlen += rsp->qlen;
rdp->n_cbs_adopted += rsp->qlen;
+ if (rsp->qlen_lazy != rsp->qlen)
+ rcu_idle_count_callbacks_posted();
rsp->qlen_lazy = 0;
rsp->qlen = 0;
@@ -1528,7 +1530,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_head *next, *list, **tail;
- int bl, count, count_lazy;
+ int bl, count, count_lazy, i;
/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -1551,9 +1553,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
*rdp->nxttail[RCU_DONE_TAIL] = NULL;
tail = rdp->nxttail[RCU_DONE_TAIL];
- for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
- if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
- rdp->nxttail[count] = &rdp->nxtlist;
+ for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
+ if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
+ rdp->nxttail[i] = &rdp->nxtlist;
local_irq_restore(flags);
/* Invoke callbacks. */
@@ -1581,9 +1583,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
if (list != NULL) {
*tail = rdp->nxtlist;
rdp->nxtlist = list;
- for (count = 0; count < RCU_NEXT_SIZE; count++)
- if (&rdp->nxtlist == rdp->nxttail[count])
- rdp->nxttail[count] = tail;
+ for (i = 0; i < RCU_NEXT_SIZE; i++)
+ if (&rdp->nxtlist == rdp->nxttail[i])
+ rdp->nxttail[i] = tail;
else
break;
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7f5d138dedf5..ea056495783e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,6 +84,20 @@ struct rcu_dynticks {
/* Process level is worth LLONG_MAX/2. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
+#ifdef CONFIG_RCU_FAST_NO_HZ
+ int dyntick_drain; /* Prepare-for-idle state variable. */
+ unsigned long dyntick_holdoff;
+ /* No retries for the jiffy of failure. */
+ struct timer_list idle_gp_timer;
+ /* Wake up CPU sleeping with callbacks. */
+ unsigned long idle_gp_timer_expires;
+ /* When to wake up CPU (for repost). */
+ bool idle_first_pass; /* First pass of attempt to go idle? */
+ unsigned long nonlazy_posted;
+ /* # times non-lazy CBs posted to CPU. */
+ unsigned long nonlazy_posted_snap;
+ /* idle-period nonlazy_posted snapshot. */
+#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
};
/* RCU's kthread states for tracing. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 2411000d9869..5271a020887e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
* Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
* any flavor of RCU.
*/
-int rcu_needs_cpu(int cpu)
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
{
+ *delta_jiffies = ULONG_MAX;
return rcu_cpu_has_callbacks(cpu);
}
@@ -1962,41 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void)
#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
-/* Loop counter for rcu_prepare_for_idle(). */
-static DEFINE_PER_CPU(int, rcu_dyntick_drain);
-/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */
-static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
-/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */
-static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer);
-/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */
-static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires);
-/* Enable special processing on first attempt to enter dyntick-idle mode. */
-static DEFINE_PER_CPU(bool, rcu_idle_first_pass);
-/* Running count of non-lazy callbacks posted, never decremented. */
-static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted);
-/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */
-static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap);
-
-/*
- * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
- * callbacks on this CPU, (2) this CPU has not yet attempted to enter
- * dyntick-idle mode, or (3) this CPU is in the process of attempting to
- * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
- * to enter dyntick-idle mode, we refuse to try to enter it. After all,
- * it is better to incur scheduling-clock interrupts than to spin
- * continuously for the same time duration!
- */
-int rcu_needs_cpu(int cpu)
-{
- /* Flag a new idle sojourn to the idle-entry state machine. */
- per_cpu(rcu_idle_first_pass, cpu) = 1;
- /* If no callbacks, RCU doesn't need the CPU. */
- if (!rcu_cpu_has_callbacks(cpu))
- return 0;
- /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
- return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
-}
-
/*
* Does the specified flavor of RCU have non-lazy callbacks pending on
* the specified CPU? Both RCU flavor and CPU are specified by the
@@ -2040,6 +2006,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
}
/*
+ * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
+ * callbacks on this CPU, (2) this CPU has not yet attempted to enter
+ * dyntick-idle mode, or (3) this CPU is in the process of attempting to
+ * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
+ * to enter dyntick-idle mode, we refuse to try to enter it. After all,
+ * it is better to incur scheduling-clock interrupts than to spin
+ * continuously for the same time duration!
+ *
+ * The delta_jiffies argument is used to store the time when RCU is
+ * going to need the CPU again if it still has callbacks. The reason
+ * for this is that rcu_prepare_for_idle() might need to post a timer,
+ * but if so, it will do so after tick_nohz_stop_sched_tick() has set
+ * the wakeup time for this CPU. This means that RCU's timer can be
+ * delayed until the wakeup time, which defeats the purpose of posting
+ * a timer.
+ */
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
+{
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+ /* Flag a new idle sojourn to the idle-entry state machine. */
+ rdtp->idle_first_pass = 1;
+ /* If no callbacks, RCU doesn't need the CPU. */
+ if (!rcu_cpu_has_callbacks(cpu)) {
+ *delta_jiffies = ULONG_MAX;
+ return 0;
+ }
+ if (rdtp->dyntick_holdoff == jiffies) {
+ /* RCU recently tried and failed, so don't try again. */
+ *delta_jiffies = 1;
+ return 1;
+ }
+ /* Set up for the possibility that RCU will post a timer. */
+ if (rcu_cpu_has_nonlazy_callbacks(cpu))
+ *delta_jiffies = RCU_IDLE_GP_DELAY;
+ else
+ *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
+ return 0;
+}
+
+/*
* Handler for smp_call_function_single(). The only point of this
* handler is to wake the CPU up, so the handler does only tracing.
*/
@@ -2075,21 +2082,24 @@ static void rcu_idle_gp_timer_func(unsigned long cpu_in)
*/
static void rcu_prepare_for_idle_init(int cpu)
{
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
- setup_timer(&per_cpu(rcu_idle_gp_timer, cpu),
- rcu_idle_gp_timer_func, cpu);
- per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1;
- per_cpu(rcu_idle_first_pass, cpu) = 1;
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+ rdtp->dyntick_holdoff = jiffies - 1;
+ setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
+ rdtp->idle_gp_timer_expires = jiffies - 1;
+ rdtp->idle_first_pass = 1;
}
/*
* Clean up for exit from idle. Because we are exiting from idle, there
- * is no longer any point to rcu_idle_gp_timer, so cancel it. This will
+ * is no longer any point to ->idle_gp_timer, so cancel it. This will
* do nothing if this timer is not active, so just cancel it unconditionally.
*/
static void rcu_cleanup_after_idle(int cpu)
{
- del_timer(&per_cpu(rcu_idle_gp_timer, cpu));
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+ del_timer(&rdtp->idle_gp_timer);
trace_rcu_prep_idle("Cleanup after idle");
}
@@ -2108,42 +2118,41 @@ static void rcu_cleanup_after_idle(int cpu)
* Because it is not legal to invoke rcu_process_callbacks() with irqs
* disabled, we do one pass of force_quiescent_state(), then do a
* invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
- * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
+ * later. The ->dyntick_drain field controls the sequencing.
*
* The caller must have disabled interrupts.
*/
static void rcu_prepare_for_idle(int cpu)
{
struct timer_list *tp;
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
/*
* If this is an idle re-entry, for example, due to use of
* RCU_NONIDLE() or the new idle-loop tracing API within the idle
* loop, then don't take any state-machine actions, unless the
* momentary exit from idle queued additional non-lazy callbacks.
- * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks
+ * Instead, repost the ->idle_gp_timer if this CPU has callbacks
* pending.
*/
- if (!per_cpu(rcu_idle_first_pass, cpu) &&
- (per_cpu(rcu_nonlazy_posted, cpu) ==
- per_cpu(rcu_nonlazy_posted_snap, cpu))) {
+ if (!rdtp->idle_first_pass &&
+ (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
if (rcu_cpu_has_callbacks(cpu)) {
- tp = &per_cpu(rcu_idle_gp_timer, cpu);
- mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu));
+ tp = &rdtp->idle_gp_timer;
+ mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
}
return;
}
- per_cpu(rcu_idle_first_pass, cpu) = 0;
- per_cpu(rcu_nonlazy_posted_snap, cpu) =
- per_cpu(rcu_nonlazy_posted, cpu) - 1;
+ rdtp->idle_first_pass = 0;
+ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
/*
* If there are no callbacks on this CPU, enter dyntick-idle mode.
* Also reset state to avoid prejudicing later attempts.
*/
if (!rcu_cpu_has_callbacks(cpu)) {
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
- per_cpu(rcu_dyntick_drain, cpu) = 0;
+ rdtp->dyntick_holdoff = jiffies - 1;
+ rdtp->dyntick_drain = 0;
trace_rcu_prep_idle("No callbacks");
return;
}
@@ -2152,36 +2161,37 @@ static void rcu_prepare_for_idle(int cpu)
* If in holdoff mode, just return. We will presumably have
* refrained from disabling the scheduling-clock tick.
*/
- if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
+ if (rdtp->dyntick_holdoff == jiffies) {
trace_rcu_prep_idle("In holdoff");
return;
}
- /* Check and update the rcu_dyntick_drain sequencing. */
- if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+ /* Check and update the ->dyntick_drain sequencing. */
+ if (rdtp->dyntick_drain <= 0) {
/* First time through, initialize the counter. */
- per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
- } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
+ rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
+ } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
!rcu_pending(cpu) &&
!local_softirq_pending()) {
/* Can we go dyntick-idle despite still having callbacks? */
- trace_rcu_prep_idle("Dyntick with callbacks");
- per_cpu(rcu_dyntick_drain, cpu) = 0;
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
- if (rcu_cpu_has_nonlazy_callbacks(cpu))
- per_cpu(rcu_idle_gp_timer_expires, cpu) =
+ rdtp->dyntick_drain = 0;
+ rdtp->dyntick_holdoff = jiffies;
+ if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
+ trace_rcu_prep_idle("Dyntick with callbacks");
+ rdtp->idle_gp_timer_expires =
jiffies + RCU_IDLE_GP_DELAY;
- else
- per_cpu(rcu_idle_gp_timer_expires, cpu) =
+ } else {
+ rdtp->idle_gp_timer_expires =
jiffies + RCU_IDLE_LAZY_GP_DELAY;
- tp = &per_cpu(rcu_idle_gp_timer, cpu);
- mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu));
- per_cpu(rcu_nonlazy_posted_snap, cpu) =
- per_cpu(rcu_nonlazy_posted, cpu);
+ trace_rcu_prep_idle("Dyntick with lazy callbacks");
+ }
+ tp = &rdtp->idle_gp_timer;
+ mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
+ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
return; /* Nothing more to do immediately. */
- } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+ } else if (--(rdtp->dyntick_drain) <= 0) {
/* We have hit the limit, so time to give up. */
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
+ rdtp->dyntick_holdoff = jiffies;
trace_rcu_prep_idle("Begin holdoff");
invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
return;
@@ -2227,7 +2237,7 @@ static void rcu_prepare_for_idle(int cpu)
*/
static void rcu_idle_count_callbacks_posted(void)
{
- __this_cpu_add(rcu_nonlazy_posted, 1);
+ __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
@@ -2238,11 +2248,12 @@ static void rcu_idle_count_callbacks_posted(void)
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
- struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu);
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+ struct timer_list *tltp = &rdtp->idle_gp_timer;
sprintf(cp, "drain=%d %c timer=%lu",
- per_cpu(rcu_dyntick_drain, cpu),
- per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.',
+ rdtp->dyntick_drain,
+ rdtp->dyntick_holdoff == jiffies ? 'H' : '.',
timer_pending(tltp) ? tltp->expires - jiffies : -1);
}
diff --git a/kernel/relay.c b/kernel/relay.c
index ab56a1764d4d..e8cd2027abbd 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
struct splice_pipe_desc spd = {
.pages = pages,
.nr_pages = 0,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.partial = partial,
.flags = flags,
.ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
ret += padding;
out:
- splice_shrink_spd(pipe, &spd);
- return ret;
+ splice_shrink_spd(&spd);
+ return ret;
}
static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/sys.c b/kernel/sys.c
index f0ec44dcd415..e0c8ffc50d7f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2127,9 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
return -EINVAL;
break;
- case PR_GET_TID_ADDRESS:
- error = prctl_get_tid_address(me, (int __user **)arg2);
- break;
default:
return -EINVAL;
}
@@ -2147,6 +2144,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_MM:
error = prctl_set_mm(arg2, arg3, arg4, arg5);
break;
+ case PR_GET_TID_ADDRESS:
+ error = prctl_get_tid_address(me, (int __user **)arg2);
+ break;
case PR_SET_CHILD_SUBREAPER:
me->signal->is_child_subreaper = !!arg2;
error = 0;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index da70c6db496c..869997833928 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
+ unsigned long rcu_delta_jiffies;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
u64 time_delta;
@@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
time_delta = timekeeping_max_deferment();
} while (read_seqretry(&xtime_lock, seq));
- if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+ if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
arch_needs_cpu(cpu)) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
@@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
/* Get the next timer wheel timer */
next_jiffies = get_next_timer_interrupt(last_jiffies);
delta_jiffies = next_jiffies - last_jiffies;
+ if (rcu_delta_jiffies < delta_jiffies) {
+ next_jiffies = last_jiffies + rcu_delta_jiffies;
+ delta_jiffies = rcu_delta_jiffies;
+ }
}
/*
* Do not stop the tick, if we are only one off
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 68032c6177db..a7fa0702be1c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -371,7 +371,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
void tracing_off(void)
{
if (global_trace.buffer)
- ring_buffer_record_on(global_trace.buffer);
+ ring_buffer_record_off(global_trace.buffer);
/*
* This flag is only looked at when buffers haven't been
* allocated yet. We don't really care about the race
@@ -3609,6 +3609,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
.pages = pages_def,
.partial = partial_def,
.nr_pages = 0, /* This gets updated below. */
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
@@ -3680,7 +3681,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
ret = splice_to_pipe(pipe, &spd);
out:
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return ret;
out_err:
@@ -4231,6 +4232,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
struct splice_pipe_desc spd = {
.pages = pages_def,
.partial = partial_def,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &buffer_pipe_buf_ops,
.spd_release = buffer_spd_release,
@@ -4318,7 +4320,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
}
ret = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
out:
return ret;
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index e5e1d85b8c7c..4b1dfba70f7c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -372,6 +372,13 @@ static int watchdog(void *unused)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/*
+ * People like the simple clean cpu node info on boot.
+ * Reduce the watchdog noise by only printing messages
+ * that are different from what cpu0 displayed.
+ */
+static unsigned long cpu0_err;
+
static int watchdog_nmi_enable(int cpu)
{
struct perf_event_attr *wd_attr;
@@ -390,11 +397,21 @@ static int watchdog_nmi_enable(int cpu)
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+
+ /* save cpu0 error for future comparision */
+ if (cpu == 0 && IS_ERR(event))
+ cpu0_err = PTR_ERR(event);
+
if (!IS_ERR(event)) {
- pr_info("enabled, takes one hw-pmu counter.\n");
+ /* only print for cpu0 or different than cpu0 */
+ if (cpu == 0 || cpu0_err)
+ pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
+ /* skip displaying the same error again */
+ if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
+ return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
if (PTR_ERR(event) == -EOPNOTSUPP)