diff options
author | David S. Miller <davem@davemloft.net> | 2009-07-10 05:18:24 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-07-10 05:18:24 +0200 |
commit | e5a8a896f5180f2950695d2d0b79db348d200ca4 (patch) | |
tree | 04adc57ae51a6d30a89ffae970770b81ee81fc23 /include | |
parent | netpoll: Introduce netpoll_carrier_timeout kernel option (diff) | |
parent | cxgb3: Fix crash caused by stashing wrong netdev_queue (diff) | |
download | linux-e5a8a896f5180f2950695d2d0b79db348d200ca4.tar.xz linux-e5a8a896f5180f2950695d2d0b79db348d200ca4.zip |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/rfkill.h | 1 | ||||
-rw-r--r-- | include/linux/spinlock.h | 5 | ||||
-rw-r--r-- | include/net/sock.h | 69 |
3 files changed, 74 insertions, 1 deletions
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e73e2429a1b1..2ce29831feb6 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -99,7 +99,6 @@ enum rfkill_user_states { #undef RFKILL_STATE_UNBLOCKED #undef RFKILL_STATE_HARD_BLOCKED -#include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mutex.h> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 252b245cfcf4..4be57ab03478 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -132,6 +132,11 @@ do { \ #endif /*__raw_spin_is_contended*/ #endif +/* The lock does not imply full memory barrier. */ +#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK +static inline void smp_mb__after_lock(void) { smp_mb(); } +#endif + /** * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/include/net/sock.h b/include/net/sock.h index 352f06bbd7a9..2c0da9239b95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -54,6 +54,7 @@ #include <linux/filter.h> #include <linux/rculist_nulls.h> +#include <linux/poll.h> #include <asm/atomic.h> #include <net/dst.h> @@ -1241,6 +1242,74 @@ static inline int sk_has_allocations(const struct sock *sk) return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); } +/** + * sk_has_sleeper - check if there are any waiting processes + * @sk: socket + * + * Returns true if socket has waiting processes + * + * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * barrier call. They were added due to the race found within the tcp code. + * + * Consider following tcp code paths: + * + * CPU1 CPU2 + * + * sys_select receive packet + * ... ... + * __add_wait_queue update tp->rcv_nxt + * ... ... + * tp->rcv_nxt check sock_def_readable + * ... { + * schedule ... + * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + * wake_up_interruptible(sk->sk_sleep) + * ... + * } + * + * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay + * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 + * could then endup calling schedule and sleep forever if there are no more + * data on the socket. + * + * The sk_has_sleeper is always called right after a call to read_lock, so we + * can use smp_mb__after_lock barrier. + */ +static inline int sk_has_sleeper(struct sock *sk) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier is paired in the sock_poll_wait. + */ + smp_mb__after_lock(); + return sk->sk_sleep && waitqueue_active(sk->sk_sleep); +} + +/** + * sock_poll_wait - place memory barrier behind the poll_wait call. + * @filp: file + * @wait_address: socket wait queue + * @p: poll_table + * + * See the comments in the sk_has_sleeper function. + */ +static inline void sock_poll_wait(struct file *filp, + wait_queue_head_t *wait_address, poll_table *p) +{ + if (p && wait_address) { + poll_wait(filp, wait_address, p); + /* + * We need to be sure we are in sync with the + * socket flags modification. + * + * This memory barrier is paired in the sk_has_sleeper. + */ + smp_mb(); + } +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in |