diff options
author | David S. Miller <davem@davemloft.net> | 2014-12-12 03:15:37 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-12-12 03:15:37 +0100 |
commit | 697766df6b952f09b17eefda8b5ef746acb9c1eb (patch) | |
tree | a4962667802529c26231f4768c0233a15f9e9e4c | |
parent | Merge branch 'dsa' (diff) | |
parent | fm10k/igb/ixgbe: Use dma_rmb on Rx descriptor reads (diff) | |
download | linux-697766df6b952f09b17eefda8b5ef746acb9c1eb.tar.xz linux-697766df6b952f09b17eefda8b5ef746acb9c1eb.zip |
Merge branch 'dma_mb'
Alexander Duyck says:
====================
arch: Add lightweight memory barriers for coherent memory access
These patches introduce two new primitives for synchronizing cache coherent
memory writes and reads. These two new primitives are:
dma_rmb()
dma_wmb()
The first patch cleans up some unnecessary overhead related to the
definition of read_barrier_depends, smp_read_barrier_depends, and comments
related to the barrier.
The second patch adds the primitives for the applicable architectures and
asm-generic.
The third patch adds the barriers to r8169 which turns out to be a good
example of where the new barriers might be useful as they have full
rmb()/wmb() barriers ordering accesses to the descriptors and the DescOwn
bit.
The fourth patch adds support for coherent_rmb() to the Intel fm10k, igb,
and ixgbe drivers. Testing with the ixgbe driver has shown a processing
time reduction of at least 7ns per 64B frame on a Core i7-4930K.
This patch series is essentially the v7 for:
v4-7: Add lightweight memory barriers for coherent memory access
v3: Add lightweight memory barriers fast_rmb() and fast_wmb()
v2: Introduce load_acquire() and store_release()
v1: Introduce read_acquire()
The key changes in this patch series versus the earlier patches are:
v7 resubmit:
- Added Acked-by: Ben Herrenschmidt from v5 to dma_rmb/wmb patch
- No code changes from previous set, still applies cleanly and builds.
v7:
- Dropped test/debug patch that was accidentally slipped in
v6:
- Replaced "memory based device I/O" with "consistent memory" in
docs
- Added reference to DMA-API.txt to explain consistent memory
v5:
- Renamed barriers dma_rmb and dma_wmb
- Undid smp_wmb changes in x86 and PowerPC
- Defined smp_rmb as __lwsync for SMP case on PowerPC
v4:
- Renamed barriers coherent_rmb and coherent_wmb
- Added smp_lwsync for use in smp_load_acquire/smp_store_release
v3:
- Moved away from acquire()/store() and instead focused on barriers
- Added cleanup of read_barrier_depends
- Added change in r8169 to fix cur_tx/DescOwn ordering
- Simplified changes to just replacing/moving barriers in r8169
- Added update to documentation with code example
v2:
- Renamed read_acquire() to be consistent with smp_load_acquire()
- Changed barrier used to be consistent with smp_load_acquire()
- Updated PowerPC code to use __lwsync based on IBM article
- Added store_release() as this is a viable use case for drivers
- Added r8169 patch which is able to fully use primitives
- Added fm10k/igb/ixgbe patch which is able to test performance
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/memory-barriers.txt | 42 | ||||
-rw-r--r-- | arch/alpha/include/asm/barrier.h | 51 | ||||
-rw-r--r-- | arch/arm/include/asm/barrier.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/barrier.h | 3 | ||||
-rw-r--r-- | arch/blackfin/include/asm/barrier.h | 51 | ||||
-rw-r--r-- | arch/ia64/include/asm/barrier.h | 25 | ||||
-rw-r--r-- | arch/metag/include/asm/barrier.h | 19 | ||||
-rw-r--r-- | arch/mips/include/asm/barrier.h | 61 | ||||
-rw-r--r-- | arch/powerpc/include/asm/barrier.h | 19 | ||||
-rw-r--r-- | arch/s390/include/asm/barrier.h | 7 | ||||
-rw-r--r-- | arch/sparc/include/asm/barrier_64.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/barrier.h | 70 | ||||
-rw-r--r-- | arch/x86/um/asm/barrier.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/fm10k/fm10k_main.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/realtek/r8169.c | 29 | ||||
-rw-r--r-- | include/asm-generic/barrier.h | 8 |
18 files changed, 258 insertions, 179 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 7ee2ae6d5451..70a09f8a0383 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1633,6 +1633,48 @@ There are some more advanced barrier functions: operations" subsection for information on where to use these. + (*) dma_wmb(); + (*) dma_rmb(); + + These are for use with consistent memory to guarantee the ordering + of writes or reads of shared memory accessible to both the CPU and a + DMA capable device. + + For example, consider a device driver that shares memory with a device + and uses a descriptor status value to indicate if the descriptor belongs + to the device or the CPU, and a doorbell to notify it when new + descriptors are available: + + if (desc->status != DEVICE_OWN) { + /* do not read data until we own descriptor */ + dma_rmb(); + + /* read/modify data */ + read_data = desc->data; + desc->data = write_data; + + /* flush modifications before status update */ + dma_wmb(); + + /* assign ownership */ + desc->status = DEVICE_OWN; + + /* force memory to sync before notifying device via MMIO */ + wmb(); + + /* notify device of new descriptors */ + writel(DESC_NOTIFY, doorbell); + } + + The dma_rmb() allows us guarantee the device has released ownership + before we read the data from the descriptor, and he dma_wmb() allows + us to guarantee the data is written to the descriptor before the device + can see it now has ownership. The wmb() is needed to guarantee that the + cache coherent memory writes have completed before attempting a write to + the cache incoherent MMIO region. + + See Documentation/DMA-API.txt for more information on consistent memory. + MMIO WRITE BARRIER ------------------ diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h index 3832bdb794fe..77516c87255d 100644 --- a/arch/alpha/include/asm/barrier.h +++ b/arch/alpha/include/asm/barrier.h @@ -7,6 +7,57 @@ #define rmb() __asm__ __volatile__("mb": : :"memory") #define wmb() __asm__ __volatile__("wmb": : :"memory") +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + */ #define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") #ifdef CONFIG_SMP diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index c6a3e73a6e24..d2f81e6b8c1c 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -43,10 +43,14 @@ #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() #define wmb() do { dsb(st); outer_sync(); } while (0) +#define dma_rmb() dmb(osh) +#define dma_wmb() dmb(oshst) #else #define mb() barrier() #define rmb() barrier() #define wmb() barrier() +#define dma_rmb() barrier() +#define dma_wmb() barrier() #endif #ifndef CONFIG_SMP diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 6389d60574d9..a5abb0062d6e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -32,6 +32,9 @@ #define rmb() dsb(ld) #define wmb() dsb(st) +#define dma_rmb() dmb(oshld) +#define dma_wmb() dmb(oshst) + #ifndef CONFIG_SMP #define smp_mb() barrier() #define smp_rmb() barrier() diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h index 420006877998..dfb66fe88b34 100644 --- a/arch/blackfin/include/asm/barrier.h +++ b/arch/blackfin/include/asm/barrier.h @@ -22,6 +22,57 @@ # define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) # define rmb() do { barrier(); smp_check_barrier(); } while (0) # define wmb() do { barrier(); smp_mark_barrier(); } while (0) +/* + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + */ # define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) #endif diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index a48957c7b445..f6769eb2bbf9 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -35,26 +35,25 @@ * it's (presumably) much slower than mf and (b) mf.a is supported for * sequential memory pages only. */ -#define mb() ia64_mf() -#define rmb() mb() -#define wmb() mb() -#define read_barrier_depends() do { } while(0) +#define mb() ia64_mf() +#define rmb() mb() +#define wmb() mb() + +#define dma_rmb() mb() +#define dma_wmb() mb() #ifdef CONFIG_SMP # define smp_mb() mb() -# define smp_rmb() rmb() -# define smp_wmb() wmb() -# define smp_read_barrier_depends() read_barrier_depends() - #else - # define smp_mb() barrier() -# define smp_rmb() barrier() -# define smp_wmb() barrier() -# define smp_read_barrier_depends() do { } while(0) - #endif +#define smp_rmb() smp_mb() +#define smp_wmb() smp_mb() + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #define smp_mb__before_atomic() barrier() #define smp_mb__after_atomic() barrier() diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index c7591e80067c..d703d8e26a65 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -4,8 +4,6 @@ #include <asm/metag_mem.h> #define nop() asm volatile ("NOP") -#define mb() wmb() -#define rmb() barrier() #ifdef CONFIG_METAG_META21 @@ -41,13 +39,13 @@ static inline void wr_fence(void) #endif /* !CONFIG_METAG_META21 */ -static inline void wmb(void) -{ - /* flush writes through the write combiner */ - wr_fence(); -} +/* flush writes through the write combiner */ +#define mb() wr_fence() +#define rmb() barrier() +#define wmb() mb() -#define read_barrier_depends() do { } while (0) +#define dma_rmb() rmb() +#define dma_wmb() wmb() #ifndef CONFIG_SMP #define fence() do { } while (0) @@ -82,7 +80,10 @@ static inline void fence(void) #define smp_wmb() barrier() #endif #endif -#define smp_read_barrier_depends() do { } while (0) + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #define set_mb(var, value) do { var = value; smp_mb(); } while (0) #define smp_store_release(p, v) \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index d0101dd0575e..2b8bbbcb9be0 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -10,58 +10,6 @@ #include <asm/addrspace.h> -/* - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - */ - #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) @@ -127,20 +75,21 @@ #include <asm/wbflush.h> -#define wmb() fast_wmb() -#define rmb() fast_rmb() #define mb() wbflush() #define iob() wbflush() #else /* !CONFIG_CPU_HAS_WB */ -#define wmb() fast_wmb() -#define rmb() fast_rmb() #define mb() fast_mb() #define iob() fast_iob() #endif /* !CONFIG_CPU_HAS_WB */ +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define dma_wmb() fast_wmb() +#define dma_rmb() fast_rmb() + #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) # ifdef CONFIG_CPU_CAVIUM_OCTEON # define smp_mb() __sync() diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index bab79a110c7b..a3bf5be111ff 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -33,12 +33,9 @@ #define mb() __asm__ __volatile__ ("sync" : : : "memory") #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define read_barrier_depends() do { } while(0) #define set_mb(var, value) do { var = value; mb(); } while (0) -#ifdef CONFIG_SMP - #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC #else @@ -46,20 +43,26 @@ #endif #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") +#define dma_rmb() __lwsync() +#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") + +#ifdef CONFIG_SMP +#define smp_lwsync() __lwsync() #define smp_mb() mb() #define smp_rmb() __lwsync() #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") -#define smp_read_barrier_depends() read_barrier_depends() #else -#define __lwsync() barrier() +#define smp_lwsync() barrier() #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) #endif /* CONFIG_SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + /* * This is a barrier which prevents following instructions from being * started until the value of the argument x is known. For example, if @@ -72,7 +75,7 @@ #define smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ACCESS_ONCE(*p) = (v); \ } while (0) @@ -80,7 +83,7 @@ do { \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ___p1; \ }) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index b5dce6544d76..8d724718ec21 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -24,11 +24,14 @@ #define rmb() mb() #define wmb() mb() -#define read_barrier_depends() do { } while(0) +#define dma_rmb() rmb() +#define dma_wmb() wmb() #define smp_mb() mb() #define smp_rmb() rmb() #define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 305dcc3dc721..76648941fea7 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -37,7 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define rmb() __asm__ __volatile__("":::"memory") #define wmb() __asm__ __volatile__("":::"memory") -#define read_barrier_depends() do { } while(0) +#define dma_rmb() rmb() +#define dma_wmb() wmb() + #define set_mb(__var, __value) \ do { __var = __value; membar_safe("#StoreLoad"); } while(0) @@ -51,7 +53,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define smp_wmb() __asm__ __volatile__("":::"memory") #endif -#define smp_read_barrier_depends() do { } while(0) +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 0f4460b5636d..2ab1eb33106e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,78 +24,28 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() +#define dma_rmb() rmb() #else -# define smp_rmb() barrier() +#define dma_rmb() barrier() #endif +#define dma_wmb() barrier() + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() dma_rmb() #define smp_wmb() barrier() -#define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #if defined(CONFIG_X86_PPRO_FENCE) /* diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc04e67bfd05..2d7d9a1f5b53 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -29,20 +29,18 @@ #endif /* CONFIG_X86_32 */ -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP - -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -#define smp_rmb() rmb() +#define dma_rmb() rmb() #else /* CONFIG_X86_PPRO_FENCE */ -#define smp_rmb() barrier() +#define dma_rmb() barrier() #endif /* CONFIG_X86_PPRO_FENCE */ +#define dma_wmb() barrier() -#define smp_wmb() barrier() +#ifdef CONFIG_SMP -#define smp_read_barrier_depends() read_barrier_depends() +#define smp_mb() mb() +#define smp_rmb() dma_rmb() +#define smp_wmb() barrier() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* CONFIG_SMP */ @@ -50,11 +48,13 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* CONFIG_SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + /* * Stop RDTSC speculation. This is needed when you need to use RDTSC * (or get_cycles or vread that possibly accesses the TSC) in a defined diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index ee1ecb146df7..eb088b129bc7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -615,14 +615,14 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean); - if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD)) + if (!rx_desc->d.staterr) break; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the - * RXD_STATUS_DD bit is set + * descriptor has been written back */ - rmb(); + dma_rmb(); /* retrieve a buffer from the ring */ skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 2e526d4904a6..ff59897a9463 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6910,14 +6910,14 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + if (!rx_desc->wb.upper.status_error) break; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set + * descriptor has been written back */ - rmb(); + dma_rmb(); /* retrieve a buffer from the ring */ skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 798b05556e1b..2ed2c7de2304 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2009,15 +2009,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); - if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) + if (!rx_desc->wb.upper.status_error) break; - /* - * This memory barrier is needed to keep us from reading + /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set + * descriptor has been written back */ - rmb(); + dma_rmb(); /* retrieve a buffer from the ring */ skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 3dad7e884952..088136b37ebe 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6605,6 +6605,9 @@ static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz) { u32 eor = le32_to_cpu(desc->opts1) & RingEnd; + /* Force memory writes to complete before releasing descriptor */ + dma_wmb(); + desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz); } @@ -6612,7 +6615,6 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, u32 rx_buf_sz) { desc->addr = cpu_to_le64(mapping); - wmb(); rtl8169_mark_to_asic(desc, rx_buf_sz); } @@ -7073,16 +7075,18 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); - wmb(); + /* Force memory writes to complete before releasing descriptor */ + dma_wmb(); /* Anti gcc 2.95.3 bugware (sic) */ status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); txd->opts1 = cpu_to_le32(status); - tp->cur_tx += frags + 1; - + /* Force all memory writes to complete before notifying device */ wmb(); + tp->cur_tx += frags + 1; + RTL_W8(TxPoll, NPQ); mmiowb(); @@ -7181,11 +7185,16 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) struct ring_info *tx_skb = tp->tx_skb + entry; u32 status; - rmb(); status = le32_to_cpu(tp->TxDescArray[entry].opts1); if (status & DescOwn) break; + /* This barrier is needed to keep us from reading + * any other fields out of the Tx descriptor until + * we know the status of DescOwn + */ + dma_rmb(); + rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb, tp->TxDescArray + entry); if (status & LastFrag) { @@ -7280,11 +7289,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget struct RxDesc *desc = tp->RxDescArray + entry; u32 status; - rmb(); status = le32_to_cpu(desc->opts1) & tp->opts1_mask; - if (status & DescOwn) break; + + /* This barrier is needed to keep us from reading + * any other fields out of the Rx descriptor until + * we know the status of DescOwn + */ + dma_rmb(); + if (unlikely(status & RxRES)) { netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n", status); @@ -7346,7 +7360,6 @@ process_pkt: } release_descriptor: desc->opts2 = 0; - wmb(); rtl8169_mark_to_asic(desc, rx_buf_sz); } diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 1402fa855388..f5c40b0fadc2 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -42,6 +42,14 @@ #define wmb() mb() #endif +#ifndef dma_rmb +#define dma_rmb() rmb() +#endif + +#ifndef dma_wmb +#define dma_wmb() wmb() +#endif + #ifndef read_barrier_depends #define read_barrier_depends() do { } while (0) #endif |