summaryrefslogtreecommitdiffstats
path: root/arch/parisc/kernel/entry.S
diff options
context:
space:
mode:
authorJohn David Anglin <dave.anglin@bell.net>2015-07-01 23:18:37 +0200
committerHelge Deller <deller@gmx.de>2015-07-10 21:47:47 +0200
commit01ab60570427caa24b9debc369e452e86cd9beb4 (patch)
tree473a38189494252e70a98a17bc342015ee0c681f /arch/parisc/kernel/entry.S
parentstifb: Implement hardware accelerated copyarea (diff)
downloadlinux-01ab60570427caa24b9debc369e452e86cd9beb4.tar.xz
linux-01ab60570427caa24b9debc369e452e86cd9beb4.zip
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results
The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
Diffstat (limited to 'arch/parisc/kernel/entry.S')
-rw-r--r--arch/parisc/kernel/entry.S163
1 files changed, 79 insertions, 84 deletions
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 75819617f93b..c5ef4081b01d 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -45,7 +45,7 @@
.level 2.0
#endif
- .import pa_dbit_lock,data
+ .import pa_tlb_lock,data
/* space_to_prot macro creates a prot id from a space id */
@@ -420,8 +420,8 @@
SHLREG %r9,PxD_VALUE_SHIFT,\pmd
extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
- shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd
- LDREG %r0(\pmd),\pte /* pmd is now pte */
+ shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
+ LDREG %r0(\pmd),\pte
bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault
.endm
@@ -453,57 +453,53 @@
L2_ptep \pgd,\pte,\index,\va,\fault
.endm
- /* Acquire pa_dbit_lock lock. */
- .macro dbit_lock spc,tmp,tmp1
+ /* Acquire pa_tlb_lock lock and recheck page is still present. */
+ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
cmpib,COND(=),n 0,\spc,2f
- load32 PA(pa_dbit_lock),\tmp
+ load32 PA(pa_tlb_lock),\tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
nop
+ LDREG 0(\ptp),\pte
+ bb,<,n \pte,_PAGE_PRESENT_BIT,2f
+ b \fault
+ stw \spc,0(\tmp)
2:
#endif
.endm
- /* Release pa_dbit_lock lock without reloading lock address. */
- .macro dbit_unlock0 spc,tmp
+ /* Release pa_tlb_lock lock without reloading lock address. */
+ .macro tlb_unlock0 spc,tmp
#ifdef CONFIG_SMP
or,COND(=) %r0,\spc,%r0
stw \spc,0(\tmp)
#endif
.endm
- /* Release pa_dbit_lock lock. */
- .macro dbit_unlock1 spc,tmp
+ /* Release pa_tlb_lock lock. */
+ .macro tlb_unlock1 spc,tmp
#ifdef CONFIG_SMP
- load32 PA(pa_dbit_lock),\tmp
- dbit_unlock0 \spc,\tmp
+ load32 PA(pa_tlb_lock),\tmp
+ tlb_unlock0 \spc,\tmp
#endif
.endm
/* Set the _PAGE_ACCESSED bit of the PTE. Be clever and
* don't needlessly dirty the cache line if it was already set */
- .macro update_ptep spc,ptep,pte,tmp,tmp1
-#ifdef CONFIG_SMP
- or,COND(=) %r0,\spc,%r0
- LDREG 0(\ptep),\pte
-#endif
+ .macro update_accessed ptp,pte,tmp,tmp1
ldi _PAGE_ACCESSED,\tmp1
or \tmp1,\pte,\tmp
and,COND(<>) \tmp1,\pte,%r0
- STREG \tmp,0(\ptep)
+ STREG \tmp,0(\ptp)
.endm
/* Set the dirty bit (and accessed bit). No need to be
* clever, this is only used from the dirty fault */
- .macro update_dirty spc,ptep,pte,tmp
-#ifdef CONFIG_SMP
- or,COND(=) %r0,\spc,%r0
- LDREG 0(\ptep),\pte
-#endif
+ .macro update_dirty ptp,pte,tmp
ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp
or \tmp,\pte,\pte
- STREG \pte,0(\ptep)
+ STREG \pte,0(\ptp)
.endm
/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
@@ -1148,14 +1144,14 @@ dtlb_miss_20w:
L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
idtlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1174,14 +1170,14 @@ nadtlb_miss_20w:
L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
idtlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1202,20 +1198,20 @@ dtlb_miss_11:
L2_ptep ptp,pte,t0,va,dtlb_check_alias_11
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11
+ update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
- mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */
+ mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
idtlba pte,(%sr1,va)
idtlbp prot,(%sr1,va)
- mtsp t0, %sr1 /* Restore sr1 */
- dbit_unlock1 spc,t0
+ mtsp t1, %sr1 /* Restore sr1 */
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1235,21 +1231,20 @@ nadtlb_miss_11:
L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11
+ update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
-
- mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */
+ mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
idtlba pte,(%sr1,va)
idtlbp prot,(%sr1,va)
- mtsp t0, %sr1 /* Restore sr1 */
- dbit_unlock1 spc,t0
+ mtsp t1, %sr1 /* Restore sr1 */
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1269,16 +1264,16 @@ dtlb_miss_20:
L2_ptep ptp,pte,t0,va,dtlb_check_alias_20
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
- f_extend pte,t0
+ f_extend pte,t1
idtlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1297,16 +1292,16 @@ nadtlb_miss_20:
L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
- f_extend pte,t0
+ f_extend pte,t1
- idtlbt pte,prot
- dbit_unlock1 spc,t0
+ idtlbt pte,prot
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1406,14 +1401,14 @@ itlb_miss_20w:
L3_ptep ptp,pte,t0,va,itlb_fault
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,itlb_fault
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
iitlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1430,14 +1425,14 @@ naitlb_miss_20w:
L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
iitlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1458,20 +1453,20 @@ itlb_miss_11:
L2_ptep ptp,pte,t0,va,itlb_fault
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,itlb_fault
+ update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
- mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */
+ mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
iitlba pte,(%sr1,va)
iitlbp prot,(%sr1,va)
- mtsp t0, %sr1 /* Restore sr1 */
- dbit_unlock1 spc,t0
+ mtsp t1, %sr1 /* Restore sr1 */
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1482,20 +1477,20 @@ naitlb_miss_11:
L2_ptep ptp,pte,t0,va,naitlb_check_alias_11
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11
+ update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
- mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */
+ mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
iitlba pte,(%sr1,va)
iitlbp prot,(%sr1,va)
- mtsp t0, %sr1 /* Restore sr1 */
- dbit_unlock1 spc,t0
+ mtsp t1, %sr1 /* Restore sr1 */
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1516,16 +1511,16 @@ itlb_miss_20:
L2_ptep ptp,pte,t0,va,itlb_fault
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,itlb_fault
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
- f_extend pte,t0
+ f_extend pte,t1
iitlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1536,16 +1531,16 @@ naitlb_miss_20:
L2_ptep ptp,pte,t0,va,naitlb_check_alias_20
- dbit_lock spc,t0,t1
- update_ptep spc,ptp,pte,t0,t1
+ tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20
+ update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot
- f_extend pte,t0
+ f_extend pte,t1
iitlbt pte,prot
- dbit_unlock1 spc,t0
+ tlb_unlock1 spc,t0
rfir
nop
@@ -1568,14 +1563,14 @@ dbit_trap_20w:
L3_ptep ptp,pte,t0,va,dbit_fault
- dbit_lock spc,t0,t1
- update_dirty spc,ptp,pte,t1
+ tlb_lock spc,ptp,pte,t0,t1,dbit_fault
+ update_dirty ptp,pte,t1
make_insert_tlb spc,pte,prot
idtlbt pte,prot
- dbit_unlock0 spc,t0
+ tlb_unlock0 spc,t0
rfir
nop
#else
@@ -1588,8 +1583,8 @@ dbit_trap_11:
L2_ptep ptp,pte,t0,va,dbit_fault
- dbit_lock spc,t0,t1
- update_dirty spc,ptp,pte,t1
+ tlb_lock spc,ptp,pte,t0,t1,dbit_fault
+ update_dirty ptp,pte,t1
make_insert_tlb_11 spc,pte,prot
@@ -1600,8 +1595,8 @@ dbit_trap_11:
idtlbp prot,(%sr1,va)
mtsp t1, %sr1 /* Restore sr1 */
- dbit_unlock0 spc,t0
+ tlb_unlock0 spc,t0
rfir
nop
@@ -1612,16 +1607,16 @@ dbit_trap_20:
L2_ptep ptp,pte,t0,va,dbit_fault
- dbit_lock spc,t0,t1
- update_dirty spc,ptp,pte,t1
+ tlb_lock spc,ptp,pte,t0,t1,dbit_fault
+ update_dirty ptp,pte,t1
make_insert_tlb spc,pte,prot
f_extend pte,t1
- idtlbt pte,prot
- dbit_unlock0 spc,t0
+ idtlbt pte,prot
+ tlb_unlock0 spc,t0
rfir
nop
#endif