From 18810d1ebac89232d8f218a318ed9ff7ef198e96 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 23 Feb 2006 13:16:44 -0600 Subject: [IA64-SGI] Make number of TIO nodes configurable Make the limit for the number of TIO nodes a function of the number of C/M nodes in the system instead of a hardcoded constant. The number of TIO nodes should be the same as the number of C/M nodes. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- include/asm-ia64/sn/arch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 91c31be87b13..16adc93d7a72 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -31,7 +31,8 @@ * to ACPI3.0, this limit will be removed. The notion of "compact nodes" * should be deleted and TIOs should be included in MAX_NUMNODES. */ -#define MAX_COMPACT_NODES 512 +#define MAX_TIO_NODES MAX_NUMNODES +#define MAX_COMPACT_NODES (MAX_NUMNODES + MAX_TIO_NODES) /* * Maximum number of nodes in all partitions and in all coherency domains. -- cgit v1.2.3 From 778e2ac5970e445f8c6b7d8aa597ac162afe270a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 28 Feb 2006 17:04:20 +0000 Subject: [MIPS] Fix build error on processors that don's support copy-on-write. Signed-off-by: Ralf Baechle --- arch/mips/lib/iomap.c | 2 +- include/asm-mips/io.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/mips/lib/iomap.c b/arch/mips/lib/iomap.c index 7e2ced715cfb..f4ac5bbcd81f 100644 --- a/arch/mips/lib/iomap.c +++ b/arch/mips/lib/iomap.c @@ -63,7 +63,7 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) return ioport_map(start, len); if (flags & IORESOURCE_MEM) { if (flags & IORESOURCE_CACHEABLE) - return ioremap_cacheable_cow(start, len); + return ioremap_cachable(start, len); return ioremap_nocache(start, len); } diff --git a/include/asm-mips/io.h b/include/asm-mips/io.h index 5a4c8a54b8f4..8c011aa61afa 100644 --- a/include/asm-mips/io.h +++ b/include/asm-mips/io.h @@ -282,6 +282,24 @@ static inline void __iomem * __ioremap_mode(phys_t offset, unsigned long size, #define ioremap_nocache(offset, size) \ __ioremap_mode((offset), (size), _CACHE_UNCACHED) +/* + * ioremap_cachable - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked cachable by + * the CPU. Also enables full write-combining. Useful for some + * memory-like regions on I/O busses. + */ +#define ioremap_cachable(offset, size) \ + __ioremap_mode((offset), (size), PAGE_CACHABLE_DEFAULT) + /* * These two are MIPS specific ioremap variant. ioremap_cacheable_cow * requests a cachable mapping, ioremap_uncached_accelerated requests a -- cgit v1.2.3 From d2b176ed878d4d5fcc0bd35656dfd373f3702af9 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 28 Feb 2006 09:42:23 -0800 Subject: [IA64] sysctl option to silence unaligned trap warnings Allow sysadmin to disable all warnings about userland apps making unaligned accesses by using: # echo 1 > /proc/sys/kernel/ignore-unaligned-usertrap Rather than having to use prctl on a process by process basis. Default behaivour leaves the warnings enabled. Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/kernel/unaligned.c | 31 ++++++++++++++++++++++++++++--- include/linux/sysctl.h | 1 + kernel/sysctl.c | 14 ++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 112913896844..1e357550c776 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -52,6 +52,15 @@ dump (const char *str, void *vp, size_t len) #define IA64_FIRST_ROTATING_FR 32 #define SIGN_EXT9 0xffffffffffffff00ul +/* + * sysctl settable hook which tells the kernel whether to honor the + * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want + * to allow the super user to enable/disable this for security reasons + * (i.e. don't allow attacker to fill up logs with unaligned accesses). + */ +int no_unaligned_warning; +static int noprint_warning; + /* * For M-unit: * @@ -1324,8 +1333,9 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) goto force_sigbus; - if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) - && within_logging_rate_limit()) + if (!no_unaligned_warning && + !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && + within_logging_rate_limit()) { char buf[200]; /* comm[] is at most 16 bytes... */ size_t len; @@ -1340,7 +1350,22 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if (user_mode(regs)) tty_write_message(current->signal->tty, buf); buf[len-1] = '\0'; /* drop '\r' */ - printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */ + /* watch for command names containing %s */ + printk(KERN_WARNING "%s", buf); + } else { + if (no_unaligned_warning && !noprint_warning) { + noprint_warning = 1; + printk(KERN_WARNING "%s(%d) encountered an " + "unaligned exception which required\n" + "kernel assistance, which degrades " + "the performance of the application.\n" + "Unaligned exception warnings have " + "been disabled by the system " + "administrator\n" + "echo 0 > /proc/sys/kernel/ignore-" + "unaligned-usertrap to re-enable\n", + current->comm, current->pid); + } } } else { if (within_logging_rate_limit()) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 0e92bf7ec28e..bac61db26456 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -147,6 +147,7 @@ enum KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ + KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c05a2b7125e1..acf6c1550f27 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -124,6 +124,10 @@ extern int sysctl_hz_timer; extern int acct_parm[]; #endif +#ifdef CONFIG_IA64 +extern int no_unaligned_warning; +#endif + static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, @@ -665,6 +669,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_IA64 + { + .ctl_name = KERN_IA64_UNALIGNED, + .procname = "ignore-unaligned-usertrap", + .data = &no_unaligned_warning, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = 0 } }; -- cgit v1.2.3 From 0551fbd29e16fccd46e41b7d01bf0f8f39b14212 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 28 Feb 2006 16:59:19 -0800 Subject: [PATCH] Add mm->task_size and fix powerpc vdso This patch adds mm->task_size to keep track of the task size of a given mm and uses that to fix the powerpc vdso so that it uses the mm task size to decide what pages to fault in instead of the current thread flags (which broke when ptracing). (akpm: I expect that mm_struct.task_size will become the way in which we finally sort out the confusion between 32-bit processes and 32-bit mm's. It may need tweaks, but at this stage this patch is powerpc-only.) Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/vdso.c | 4 ++-- fs/exec.c | 6 ++++++ include/linux/sched.h | 5 +++-- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f0c47dab0903..04f7df39ffbb 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -182,8 +182,8 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma, unsigned long offset = address - vma->vm_start; struct page *pg; #ifdef CONFIG_PPC64 - void *vbase = test_thread_flag(TIF_32BIT) ? - vdso32_kbase : vdso64_kbase; + void *vbase = (vma->vm_mm->task_size > TASK_SIZE_USER32) ? + vdso64_kbase : vdso32_kbase; #else void *vbase = vdso32_kbase; #endif diff --git a/fs/exec.c b/fs/exec.c index 0e1c95074d42..0b515ac53134 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -885,6 +885,12 @@ int flush_old_exec(struct linux_binprm * bprm) current->flags &= ~PF_RANDOMIZE; flush_thread(); + /* Set the new mm task size. We have to do that late because it may + * depend on TIF_32BIT which is only updated in flush_thread() on + * some architectures like powerpc + */ + current->mm->task_size = TASK_SIZE; + if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index b6f51e3a38ec..ff2e09c953b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -298,8 +298,9 @@ struct mm_struct { unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void (*unmap_area) (struct mm_struct *mm, unsigned long addr); - unsigned long mmap_base; /* base of mmap area */ - unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ + unsigned long mmap_base; /* base of mmap area */ + unsigned long task_size; /* size of task vm space */ + unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ -- cgit v1.2.3 From f0892b89e3c19c7d805825ca12511d26dcdf6415 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Tue, 28 Feb 2006 01:18:29 -0500 Subject: [PATCH] pcmcia: Add macro to match PCMCIA cards by numeric ID and first vendor string This is needed to distinguish Intersil and non-Intersil cards with numeric ID 0x0156, 0x0002. Signed-off-by: Pavel Roskin Signed-off-by: Dominik Brodowski --- include/pcmcia/device_id.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/pcmcia/device_id.h b/include/pcmcia/device_id.h index 346d81ece287..e04e0b0d9a25 100644 --- a/include/pcmcia/device_id.h +++ b/include/pcmcia/device_id.h @@ -72,6 +72,15 @@ .prod_id = { (v1), (v2), (v3), (v4) }, \ .prod_id_hash = { (vh1), (vh2), (vh3), (vh4) }, } +#define PCMCIA_DEVICE_MANF_CARD_PROD_ID1(manf, card, v1, vh1) { \ + .match_flags = PCMCIA_DEV_ID_MATCH_MANF_ID| \ + PCMCIA_DEV_ID_MATCH_CARD_ID| \ + PCMCIA_DEV_ID_MATCH_PROD_ID1, \ + .manf_id = (manf), \ + .card_id = (card), \ + .prod_id = { (v1), NULL, NULL, NULL }, \ + .prod_id_hash = { (vh1), 0, 0, 0 }, } + /* multi-function devices */ -- cgit v1.2.3 From e5cef95d58d1e711b0bd6b00018278a06defb274 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 1 Mar 2006 13:46:00 -0800 Subject: [PATCH] fix build breakage in eeh.c in 2.6.16-rc5-git5 This patch should fixe a problem with eeh_add_device_late() not being defined in the ppc64 build process, causing the build to break. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/asm-powerpc/eeh.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index 7dfb408fe2ca..eb392032e19b 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -61,6 +61,7 @@ void __init pci_addr_cache_build(void); * to finish the eeh setup for this device. */ void eeh_add_device_early(struct device_node *); +void eeh_add_device_late(struct pci_dev *dev); void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); -- cgit v1.2.3 From 3af1efe8a301f5b1c813f5f761cb1e10d6175605 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 2 Mar 2006 13:25:26 -0500 Subject: [PATCH] reiserfs: fix unaligned bitmap usage The bitmaps associated with generation numbers for directory entries are declared as an array of ints. On some platforms, this causes alignment exceptions. The following patch uses the standard bitmap declaration macros to declare the bitmaps, fixing the problem. Originally from Takashi Iwai. Signed-off-by: Takashi Iwai Acked-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/namei.c | 8 ++++---- include/linux/reiserfs_fs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index c8123308e060..284f7852de8b 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -247,7 +247,7 @@ static int linear_search_in_dir_item(struct cpu_key *key, /* mark, that this generation number is used */ if (de->de_gen_number_bit_string) set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), - (unsigned long *)de->de_gen_number_bit_string); + de->de_gen_number_bit_string); // calculate pointer to name and namelen de->de_entry_num = i; @@ -431,7 +431,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, struct reiserfs_de_head *deh; INITIALIZE_PATH(path); struct reiserfs_dir_entry de; - int bit_string[MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1]; + DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); int gen_number; char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc if we create file with short name */ @@ -486,7 +486,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, /* find the proper place for the new entry */ memset(bit_string, 0, sizeof(bit_string)); - de.de_gen_number_bit_string = (char *)bit_string; + de.de_gen_number_bit_string = bit_string; retval = reiserfs_find_entry(dir, name, namelen, &path, &de); if (retval != NAME_NOT_FOUND) { if (buffer != small_buf) @@ -508,7 +508,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, } gen_number = - find_first_zero_bit((unsigned long *)bit_string, + find_first_zero_bit(bit_string, MAX_GENERATION_NUMBER + 1); if (gen_number > MAX_GENERATION_NUMBER) { /* there is no free generation number */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 7d51149bd793..dad78cecfd20 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1052,7 +1052,7 @@ struct reiserfs_dir_entry { int de_entrylen; int de_namelen; char *de_name; - char *de_gen_number_bit_string; + unsigned long *de_gen_number_bit_string; __u32 de_dir_id; __u32 de_objectid; -- cgit v1.2.3 From aa5cb02143123289bd37c30c0ad60339f8da0bad Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 1 Mar 2006 15:07:07 +1100 Subject: [PATCH] powerpc: Expose SMT and L1 icache snoop userland features This patch makes userland aware of the icache snoop capability of the POWER5 (and possibly others in the future) and of SMT capabilities. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/cputable.c | 9 ++++++--- include/asm-powerpc/cputable.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 10696456a4c6..e4e81374cb9a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -53,8 +53,10 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); PPC_FEATURE_HAS_MMU) #define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64) #define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) -#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5) -#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS) +#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) @@ -267,7 +269,8 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "Cell Broadband Engine", .cpu_features = CPU_FTRS_CELL, .cpu_user_features = COMMON_USER_PPC64 | - PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP, + PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_SMT, .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 90d005bb4d1c..5638518968c3 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -20,6 +20,8 @@ #define PPC_FEATURE_POWER5_PLUS 0x00020000 #define PPC_FEATURE_CELL 0x00010000 #define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 141aa59b5347a4a021e37cfbc2258df9af9392f8 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 3 Mar 2006 16:24:06 +1100 Subject: [PATCH] powerpc: Fix incorrect pud_ERROR() message The powerpc pud_ERROR() function misleadingly prints a message indicating a pmd error. This patch fixes it. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-powerpc/pgtable-4k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h index e9590c06ad92..80a7832d2721 100644 --- a/include/asm-powerpc/pgtable-4k.h +++ b/include/asm-powerpc/pgtable-4k.h @@ -88,4 +88,4 @@ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) + printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) -- cgit v1.2.3 From 4d000d5b9689734006d89fe9b7597c758b74a9fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 4 Mar 2006 23:23:56 -0800 Subject: [SPARC64]: Mark __ex_table section correctly. We must use the "a" (allocate) attribute every time we emit an entry into the __ex_table section. For consistency, use "a" instead of #alloc which is some Solaris compat cruft GNU as provides on Sparc. Signed-off-by: David S. Miller --- arch/sparc64/kernel/sys32.S | 2 +- arch/sparc64/kernel/una_asm.S | 4 ++-- arch/sparc64/lib/U1copy_from_user.S | 2 +- arch/sparc64/lib/U1copy_to_user.S | 2 +- arch/sparc64/lib/U3copy_from_user.S | 2 +- arch/sparc64/lib/U3copy_to_user.S | 2 +- arch/sparc64/lib/bzero.S | 2 +- arch/sparc64/lib/copy_in_user.S | 2 +- arch/sparc64/lib/csum_copy_from_user.S | 2 +- arch/sparc64/lib/csum_copy_to_user.S | 2 +- arch/sparc64/lib/strlen_user.S | 2 +- arch/sparc64/lib/strncpy_from_user.S | 2 +- arch/sparc64/solaris/entry64.S | 2 +- include/asm-sparc64/futex.h | 2 +- include/asm-sparc64/uaccess.h | 12 ++++++------ 15 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S index 60b59375aa78..c4a1cef4b1e5 100644 --- a/arch/sparc64/kernel/sys32.S +++ b/arch/sparc64/kernel/sys32.S @@ -318,7 +318,7 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) nop nop - .section __ex_table + .section __ex_table,"a" .align 4 .word 1b, __retl_efault, 2b, __retl_efault .word 3b, __retl_efault, 4b, __retl_efault diff --git a/arch/sparc64/kernel/una_asm.S b/arch/sparc64/kernel/una_asm.S index 1f5b5b708ce7..be183fe41443 100644 --- a/arch/sparc64/kernel/una_asm.S +++ b/arch/sparc64/kernel/una_asm.S @@ -47,7 +47,7 @@ __do_int_store: mov 0, %o0 .size __do_int_store, .-__do_int_store - .section __ex_table + .section __ex_table,"a" .word 4b, __retl_efault .word 5b, __retl_efault .word 6b, __retl_efault @@ -129,7 +129,7 @@ do_int_load: mov 0, %o0 .size __do_int_load, .-__do_int_load - .section __ex_table + .section __ex_table,"a" .word 4b, __retl_efault .word 5b, __retl_efault .word 6b, __retl_efault diff --git a/arch/sparc64/lib/U1copy_from_user.S b/arch/sparc64/lib/U1copy_from_user.S index 93146a81e2d3..3192b0bf4fab 100644 --- a/arch/sparc64/lib/U1copy_from_user.S +++ b/arch/sparc64/lib/U1copy_from_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/U1copy_to_user.S b/arch/sparc64/lib/U1copy_to_user.S index 1fccc521e2bd..d1210ffb0b82 100644 --- a/arch/sparc64/lib/U1copy_to_user.S +++ b/arch/sparc64/lib/U1copy_to_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/U3copy_from_user.S b/arch/sparc64/lib/U3copy_from_user.S index df600b667e48..f5bfc8d9d216 100644 --- a/arch/sparc64/lib/U3copy_from_user.S +++ b/arch/sparc64/lib/U3copy_from_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/U3copy_to_user.S b/arch/sparc64/lib/U3copy_to_user.S index f337f22ed82e..2334f111bb0c 100644 --- a/arch/sparc64/lib/U3copy_to_user.S +++ b/arch/sparc64/lib/U3copy_to_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S index 21a933ffb7c2..1d2abcfa4e52 100644 --- a/arch/sparc64/lib/bzero.S +++ b/arch/sparc64/lib/bzero.S @@ -92,7 +92,7 @@ __bzero_done: .align 4; \ 99: retl; \ mov %o1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/copy_in_user.S b/arch/sparc64/lib/copy_in_user.S index 816076c0bc06..650af3f21f78 100644 --- a/arch/sparc64/lib/copy_in_user.S +++ b/arch/sparc64/lib/copy_in_user.S @@ -13,7 +13,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/csum_copy_from_user.S b/arch/sparc64/lib/csum_copy_from_user.S index 817ebdae39f8..a22eddbe5dba 100644 --- a/arch/sparc64/lib/csum_copy_from_user.S +++ b/arch/sparc64/lib/csum_copy_from_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov -1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/csum_copy_to_user.S b/arch/sparc64/lib/csum_copy_to_user.S index c2f9463ea1e2..d5b12f441f02 100644 --- a/arch/sparc64/lib/csum_copy_to_user.S +++ b/arch/sparc64/lib/csum_copy_to_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov -1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/strlen_user.S b/arch/sparc64/lib/strlen_user.S index 9ed54ba14fc6..114ed111e251 100644 --- a/arch/sparc64/lib/strlen_user.S +++ b/arch/sparc64/lib/strlen_user.S @@ -85,7 +85,7 @@ __strnlen_user: retl clr %o0 - .section __ex_table,#alloc + .section __ex_table,"a" .align 4 .word 10b, 30b diff --git a/arch/sparc64/lib/strncpy_from_user.S b/arch/sparc64/lib/strncpy_from_user.S index e1264650ca7a..b2f499f79427 100644 --- a/arch/sparc64/lib/strncpy_from_user.S +++ b/arch/sparc64/lib/strncpy_from_user.S @@ -125,7 +125,7 @@ __strncpy_from_user: add %o2, %o3, %o0 .size __strncpy_from_user, .-__strncpy_from_user - .section __ex_table,#alloc + .section __ex_table,"a" .align 4 .word 60b, __retl_efault .word 61b, __retl_efault diff --git a/arch/sparc64/solaris/entry64.S b/arch/sparc64/solaris/entry64.S index eb314ed23cdb..f170324e8bf2 100644 --- a/arch/sparc64/solaris/entry64.S +++ b/arch/sparc64/solaris/entry64.S @@ -217,7 +217,7 @@ solaris_unimplemented: ba,pt %xcc, ret_from_solaris nop - .section __ex_table,#alloc + .section __ex_table,"a" .align 4 .word exen, exenf diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 0caf60147e97..34c4b43d3f98 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -20,7 +20,7 @@ "4: ba 3b\n" \ " mov %5, %0\n" \ " .previous\n" \ - " .section __ex_table,#alloc\n" \ + " .section __ex_table,\"a\"\n" \ " .align 4\n" \ " .word 1b, 4b\n" \ " .word 2b, 4b\n" \ diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h index 203e8eee6351..c91d1e38eac6 100644 --- a/include/asm-sparc64/uaccess.h +++ b/include/asm-sparc64/uaccess.h @@ -136,7 +136,7 @@ __asm__ __volatile__( \ "b 2b\n\t" \ " mov %3, %0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\t" \ ".previous\n\n\t" \ @@ -148,7 +148,7 @@ if (__builtin_constant_p(ret) && ret == -EFAULT) \ __asm__ __volatile__( \ "/* Put user asm ret, inline. */\n" \ "1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, __ret_efault\n\n\t" \ ".previous\n\n\t" \ @@ -163,7 +163,7 @@ __asm__ __volatile__( \ "ret\n\t" \ " restore %%g0, %3, %%o0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\n\t" \ ".previous\n\n\t" \ @@ -206,7 +206,7 @@ __asm__ __volatile__( \ "b 2b\n\t" \ " mov %3, %0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\n\t" \ ".previous\n\t" \ @@ -218,7 +218,7 @@ if (__builtin_constant_p(retval) && retval == -EFAULT) \ __asm__ __volatile__( \ "/* Get user asm ret, inline. */\n" \ "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b,__ret_efault\n\n\t" \ ".previous\n\t" \ @@ -233,7 +233,7 @@ __asm__ __volatile__( \ "ret\n\t" \ " restore %%g0, %2, %%o0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\n\t" \ ".previous\n\t" \ -- cgit v1.2.3 From 1e4b27df55166ce3b276f55bab223fa4ae8c5525 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Mon, 6 Mar 2006 15:42:37 -0800 Subject: [PATCH] i4l: add new PCI IDs for HFC-S PCI Add new PCI IDs for HFC-S PCI based ISDN TA 'Primux II S0' and 'Primux II S0' from Gerdes AG Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/isdn/hisax/config.c | 2 ++ drivers/isdn/hisax/hfc_pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include') diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index 8159bcecd0c2..df9d65201819 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1929,6 +1929,8 @@ static struct pci_device_id hisax_pci_tbl[] __initdata = { {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00B, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00C, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B100, PCI_ANY_ID, PCI_ANY_ID}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B700, PCI_ANY_ID, PCI_ANY_ID}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B701, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_ABOCOM, PCI_DEVICE_ID_ABOCOM_2BD1, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_ASUSTEK, PCI_DEVICE_ID_ASUSTEK_0675, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_BERKOM, PCI_DEVICE_ID_BERKOM_T_CONCEPT, PCI_ANY_ID, PCI_ANY_ID}, diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 4866fc32d8d9..91d25acb5ede 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -51,6 +51,8 @@ static const PCI_ENTRY id_list[] = {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00B, "Billion", "B00B"}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00C, "Billion", "B00C"}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B100, "Seyeon", "B100"}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B700, "Primux II S0", "B700"}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B701, "Primux II S0 NT", "B701"}, {PCI_VENDOR_ID_ABOCOM, PCI_DEVICE_ID_ABOCOM_2BD1, "Abocom/Magitek", "2BD1"}, {PCI_VENDOR_ID_ASUSTEK, PCI_DEVICE_ID_ASUSTEK_0675, "Asuscom/Askey", "675"}, {PCI_VENDOR_ID_BERKOM, PCI_DEVICE_ID_BERKOM_T_CONCEPT, "German telekom", "T-Concept"}, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 82b83da25d77..1709b5009d2e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1752,6 +1752,8 @@ #define PCI_DEVICE_ID_CCD_B00B 0xb00b #define PCI_DEVICE_ID_CCD_B00C 0xb00c #define PCI_DEVICE_ID_CCD_B100 0xb100 +#define PCI_DEVICE_ID_CCD_B700 0xb700 +#define PCI_DEVICE_ID_CCD_B701 0xb701 #define PCI_VENDOR_ID_EXAR 0x13a8 #define PCI_DEVICE_ID_EXAR_XR17C152 0x0152 -- cgit v1.2.3 From 69239749e1ac4f3496906aa4267cb9f61ce52c9c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 6 Mar 2006 15:42:45 -0800 Subject: [PATCH] fix next_timer_interrupt() for hrtimer Also from Thomas Gleixner Function next_timer_interrupt() got broken with a recent patch 6ba1b91213e81aa92b5cf7539f7d2a94ff54947c as sys_nanosleep() was moved to hrtimer. This broke things as next_timer_interrupt() did not check hrtimer tree for next event. Function next_timer_interrupt() is needed with dyntick (CONFIG_NO_IDLE_HZ, VST) implementations, as the system can be in idle when next hrtimer event was supposed to happen. At least ARM and S390 currently use next_timer_interrupt(). Signed-off-by: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/time.c | 10 ++++++---- include/linux/hrtimer.h | 4 ++++ kernel/hrtimer.c | 35 +++++++++++++++++++++++++++++++++++ kernel/timer.c | 16 ++++++++++++++++ 4 files changed, 61 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index d7d932c02866..d6bd435a6857 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -422,12 +422,14 @@ static int timer_dyn_tick_disable(void) void timer_dyn_reprogram(void) { struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long next, seq; - if (dyn_tick) { - write_seqlock(&xtime_lock); - if (dyn_tick->state & DYN_TICK_ENABLED) + if (dyn_tick && (dyn_tick->state & DYN_TICK_ENABLED)) { + next = next_timer_interrupt(); + do { + seq = read_seqbegin(&xtime_lock); dyn_tick->reprogram(next_timer_interrupt() - jiffies); - write_sequnlock(&xtime_lock); + } while (read_seqretry(&xtime_lock, seq)); } } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6361544bb6ae..6401c31d6add 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -116,6 +116,10 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer); extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); +#ifdef CONFIG_NO_IDLE_HZ +extern ktime_t hrtimer_get_next_event(void); +#endif + static inline int hrtimer_active(const struct hrtimer *timer) { return timer->state == HRTIMER_PENDING; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5ae51f1bc7c8..14bc9cfa6399 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -505,6 +505,41 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) return rem; } +#ifdef CONFIG_NO_IDLE_HZ +/** + * hrtimer_get_next_event - get the time until next expiry event + * + * Returns the delta to the next expiry event or KTIME_MAX if no timer + * is pending. + */ +ktime_t hrtimer_get_next_event(void) +{ + struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); + ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; + unsigned long flags; + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { + struct hrtimer *timer; + + spin_lock_irqsave(&base->lock, flags); + if (!base->first) { + spin_unlock_irqrestore(&base->lock, flags); + continue; + } + timer = rb_entry(base->first, struct hrtimer, node); + delta.tv64 = timer->expires.tv64; + spin_unlock_irqrestore(&base->lock, flags); + delta = ktime_sub(delta, base->get_time()); + if (delta.tv64 < mindelta.tv64) + mindelta.tv64 = delta.tv64; + } + if (mindelta.tv64 < 0) + mindelta.tv64 = 0; + return mindelta; +} +#endif + /** * hrtimer_init - initialize a timer to the given clock * diff --git a/kernel/timer.c b/kernel/timer.c index fc6646fd5aab..8256f3f5ec0d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -489,9 +489,21 @@ unsigned long next_timer_interrupt(void) struct list_head *list; struct timer_list *nte; unsigned long expires; + unsigned long hr_expires = MAX_JIFFY_OFFSET; + ktime_t hr_delta; tvec_t *varray[4]; int i, j; + hr_delta = hrtimer_get_next_event(); + if (hr_delta.tv64 != KTIME_MAX) { + struct timespec tsdelta; + tsdelta = ktime_to_timespec(hr_delta); + hr_expires = timespec_to_jiffies(&tsdelta); + if (hr_expires < 3) + return hr_expires + jiffies; + } + hr_expires += jiffies; + base = &__get_cpu_var(tvec_bases); spin_lock(&base->t_base.lock); expires = base->timer_jiffies + (LONG_MAX >> 1); @@ -542,6 +554,10 @@ found: } } spin_unlock(&base->t_base.lock); + + if (time_before(hr_expires, expires)) + return hr_expires; + return expires; } #endif -- cgit v1.2.3 From 5c8338904653365bfb92385b38915becb903d8bb Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Mon, 6 Mar 2006 15:42:46 -0800 Subject: [PATCH] s390: fix compile with VIRT_CPU_ACCOUNTING=n When CONFIG_VIRT_CPU_ACCOUNTING is not defined compiling fails with an undefined reference to account_vtime(). Signed-off-by: Jan Blunck Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-s390/system.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index b2e65e8bf812..6a89dbb03c1e 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -118,6 +118,8 @@ static inline void sched_cacheflush(void) extern void account_vtime(struct task_struct *); extern void account_tick_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); +#else +#define account_vtime(x) do { /* empty */ } while (0) #endif #define finish_arch_switch(prev) do { \ -- cgit v1.2.3 From 78679302fe428f4f3dc853a51ee24f306010d874 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 6 Mar 2006 15:42:49 -0800 Subject: [PATCH] memory-hotplug compile fix include/linux/memory_hotplug.h:53: warning: 'struct page' declared inside parameter list (akpm: I tossed in a couple more possibly-needed-sometime struct decls too) Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01f03bc06eff..968b1aa3732c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -6,6 +6,10 @@ #include #include +struct page; +struct zone; +struct pglist_data; + #ifdef CONFIG_MEMORY_HOTPLUG /* * pgdat resizing functions -- cgit v1.2.3 From a615fa83959896f8eac76c235953fb164cd1a9b9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 6 Mar 2006 15:42:50 -0800 Subject: [PATCH] Increase max kmalloc size for very large systems Systems with extemely large numbers of nodes or cpus need to kmalloc structures larger than is currently supported. This patch increases the maximum supported size for very large systems. This patch should have no effect on current systems. (akpm: why not just use alloc_pages() for sysfs_cpus?) Signed-off-by: Jack Steiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmalloc_sizes.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h index d82d4c05c12d..bda23e00ed71 100644 --- a/include/linux/kmalloc_sizes.h +++ b/include/linux/kmalloc_sizes.h @@ -19,8 +19,10 @@ CACHE(32768) CACHE(65536) CACHE(131072) -#ifndef CONFIG_MMU +#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU) CACHE(262144) +#endif +#ifndef CONFIG_MMU CACHE(524288) CACHE(1048576) #ifdef CONFIG_LARGE_ALLOCS -- cgit v1.2.3 From 6a0e243069b09a323255f6e847c87d531961cd96 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Mar 2006 14:42:27 +0000 Subject: [ARM] 3352/1: DSB required for the completion of a TLB maintenance operation Patch from Catalin Marinas Chapter B2.7.3 in the latest ARM ARM (with v6 information) states that the completion of a TLB maintenance operation is only guaranteed by the execution of a DSB (Data Syncronization Barrier, formerly Data Write Barrier or Drain Write Buffer). Note that a DSB is only needed in the flush_tlb_kernel_* functions since the completion is guaranteed by a mode change (i.e. switching back to user mode) for the flush_tlb_user_* functions. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig | 2 +- arch/arm/mm/tlb-v6.S | 1 + include/asm-arm/tlbflush.h | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 15dc1a0dffbb..9f80fa502f8f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -78,7 +78,7 @@ menu "System Type" choice prompt "ARM system type" - default ARCH_RPC + default ARCH_VERSATILE config ARCH_CLPS7500 bool "Cirrus-CL-PS7500FE" diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index 6f76b89ef46e..fd6adde39091 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -80,6 +80,7 @@ ENTRY(v6wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b + mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier mov pc, lr .section ".text.init", #alloc, #execinstr diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h index 9387a5e1ffe0..0c2acc944a0a 100644 --- a/include/asm-arm/tlbflush.h +++ b/include/asm-arm/tlbflush.h @@ -340,6 +340,12 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) asm("mcr%? p15, 0, %0, c8, c6, 1" : : "r" (kaddr)); if (tlb_flag(TLB_V6_I_PAGE)) asm("mcr%? p15, 0, %0, c8, c5, 1" : : "r" (kaddr)); + + /* The ARM ARM states that the completion of a TLB maintenance + * operation is only guaranteed by a DSB instruction + */ + if (tlb_flag(TLB_V6_U_PAGE | TLB_V6_D_PAGE | TLB_V6_I_PAGE)) + asm("mcr%? p15, 0, %0, c7, c10, 4" : : "r" (zero)); } /* -- cgit v1.2.3 From 1bd79336a426c5e4f3bab142407059ceb12cadf9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Mar 2006 13:24:22 +1100 Subject: powerpc: Fix various syscall/signal/swapcontext bugs A careful reading of the recent changes to the system call entry/exit paths revealed several problems, plus some things that could be simplified and improved: * 32-bit wasn't testing the _TIF_NOERROR bit in the syscall fast exit path, so it was only doing anything with it once it saw some other bit being set. In other words, the noerror behaviour would apply to the next system call where we had to reschedule or deliver a signal, which is not necessarily the current system call. * 32-bit wasn't doing the call to ptrace_notify in the syscall exit path when the _TIF_SINGLESTEP bit was set. * _TIF_RESTOREALL was in both _TIF_USER_WORK_MASK and _TIF_PERSYSCALL_MASK, which is odd since _TIF_RESTOREALL is only set by system calls. I took it out of _TIF_USER_WORK_MASK. * On 64-bit, _TIF_RESTOREALL wasn't causing the non-volatile registers to be restored (unless perhaps a signal was delivered or the syscall was traced or single-stepped). Thus the non-volatile registers weren't restored on exit from a signal handler. We probably got away with it mostly because signal handlers written in C wouldn't alter the non-volatile registers. * On 32-bit I simplified the code and made it more like 64-bit by making the syscall exit path jump to ret_from_except to handle preemption and signal delivery. * 32-bit was calling do_signal unnecessarily when _TIF_RESTOREALL was set - but I think because of that 32-bit was actually restoring the non-volatile registers on exit from a signal handler. * I changed the order of enabling interrupts and saving the non-volatile registers before calling do_syscall_trace_leave; now we enable interrupts first. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/entry_32.S | 95 ++++++++++----------------------------- arch/powerpc/kernel/entry_64.S | 94 +++++++++----------------------------- arch/powerpc/kernel/ptrace.c | 5 +-- arch/powerpc/kernel/signal_32.c | 19 ++------ arch/powerpc/kernel/signal_64.c | 9 +--- arch/powerpc/kernel/systbl.S | 2 +- arch/ppc/kernel/asm-offsets.c | 1 - arch/ppc/kernel/entry.S | 95 ++++++++++----------------------------- include/asm-powerpc/thread_info.h | 8 +--- 10 files changed, 78 insertions(+), 251 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 840aad43a98b..c9a660e4c2db 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -92,7 +92,6 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_SIGFRAME, offsetof(struct thread_info, nvgprs_frame)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); #ifdef CONFIG_PPC32 DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index f20a67261ec7..4827ca1ec89b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -227,7 +227,7 @@ ret_from_syscall: MTMSRD(r10) lwz r9,TI_FLAGS(r12) li r8,-_LAST_ERRNO - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_RESTORE_SIGMASK) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmplw 0,r3,r8 blt+ syscall_exit_cont @@ -287,8 +287,10 @@ syscall_dotrace: syscall_exit_work: andi. r0,r9,_TIF_RESTOREALL - bne- 2f - cmplw 0,r3,r8 + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmplw 0,r3,r8 blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f @@ -302,9 +304,7 @@ syscall_exit_work: 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) beq 4f - /* Clear per-syscall TIF flags if any are set, but _leave_ - _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that - yet. */ + /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK addi r12,r12,TI_FLAGS @@ -318,8 +318,13 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) - beq 7f + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq ret_from_except + + /* Re-enable interrupts */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* Save NVGPRS if they're not saved already */ lwz r4,_TRAP(r1) @@ -328,71 +333,11 @@ syscall_exit_work: SAVE_NVGPRS(r1) li r4,0xc00 stw r4,_TRAP(r1) - - /* Re-enable interrupts */ -5: ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) - - andi. r0,r9,_TIF_SAVE_NVGPRS - bne save_user_nvgprs - -save_user_nvgprs_cont: - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq 7f - +5: addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_leave - REST_NVGPRS(r1) - -6: lwz r3,GPR3(r1) - LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - SYNC - MTMSRD(r10) /* disable interrupts again */ - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) -7: - andi. r0,r9,_TIF_NEED_RESCHED - bne 8f - lwz r5,_MSR(r1) - andi. r5,r5,MSR_PR - beq ret_from_except - andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK - beq ret_from_except - b do_user_signal -8: - ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) /* re-enable interrupts */ - bl schedule - b 6b - -save_user_nvgprs: - lwz r8,TI_SIGFRAME(r12) - -.macro savewords start, end - 1: stw \start,4*(\start)(r8) - .section __ex_table,"a" - .align 2 - .long 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savewords "(\start+1)",\end - .endif -.endm - savewords 14,31 - b save_user_nvgprs_cont - - -save_user_nvgprs_fault: - li r3,11 /* SIGSEGV */ - lwz r4,TI_TASK(r12) - bl force_sigsegv + b ret_from_except_full - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) - b save_user_nvgprs_cont - #ifdef SHOW_SYSCALLS do_show_syscall: #ifdef SHOW_SYSCALLS_TASK @@ -490,6 +435,14 @@ ppc_clone: stw r0,_TRAP(r1) /* register set saved */ b sys_clone + .globl ppc_swapcontext +ppc_swapcontext: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_swapcontext + /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -683,7 +636,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_RESTORE_SIGMASK) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) bne do_work restore_user: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 388f861b8ed1..24be0cf86d7f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -160,7 +160,7 @@ syscall_exit: mtmsrd r10,1 ld r9,TI_FLAGS(r12) li r11,-_LAST_ERRNO - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_SAVE_NVGPRS|_TIF_NOERROR|_TIF_RESTORE_SIGMASK) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmpld r3,r11 ld r5,_CCR(r1) @@ -216,8 +216,10 @@ syscall_exit_work: If TIF_NOERROR is set, just save r3 as it is. */ andi. r0,r9,_TIF_RESTOREALL - bne- 2f - cmpld r3,r11 /* r10 is -LAST_ERRNO */ + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmpld r3,r11 /* r10 is -LAST_ERRNO */ blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f @@ -229,9 +231,7 @@ syscall_exit_work: 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) beq 4f - /* Clear per-syscall TIF flags if any are set, but _leave_ - _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that - yet. */ + /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK addi r12,r12,TI_FLAGS @@ -240,10 +240,9 @@ syscall_exit_work: stdcx. r10,0,r12 bne- 3b subi r12,r12,TI_FLAGS - -4: bl .save_nvgprs - /* Anything else left to do? */ - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) + +4: /* Anything else left to do? */ + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq .ret_from_except_lite /* Re-enable interrupts */ @@ -251,26 +250,10 @@ syscall_exit_work: ori r10,r10,MSR_EE mtmsrd r10,1 - andi. r0,r9,_TIF_SAVE_NVGPRS - bne save_user_nvgprs - - /* If tracing, re-enable interrupts and do it */ -save_user_nvgprs_cont: - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq 5f - + bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .do_syscall_trace_leave - REST_NVGPRS(r1) - clrrdi r12,r1,THREAD_SHIFT - - /* Disable interrupts again and handle other work if any */ -5: mfmsr r10 - rldicl r10,r10,48,1 - rotldi r10,r10,16 - mtmsrd r10,1 - - b .ret_from_except_lite + b .ret_from_except /* Save non-volatile GPRs, if not already saved. */ _GLOBAL(save_nvgprs) @@ -282,51 +265,6 @@ _GLOBAL(save_nvgprs) std r0,_TRAP(r1) blr - -save_user_nvgprs: - ld r10,TI_SIGFRAME(r12) - andi. r0,r9,_TIF_32BIT - beq- save_user_nvgprs_64 - - /* 32-bit save to userspace */ - -.macro savewords start, end - 1: stw \start,4*(\start)(r10) - .section __ex_table,"a" - .align 3 - .llong 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savewords "(\start+1)",\end - .endif -.endm - savewords 14,31 - b save_user_nvgprs_cont - -save_user_nvgprs_64: - /* 64-bit save to userspace */ - -.macro savelongs start, end - 1: std \start,8*(\start)(r10) - .section __ex_table,"a" - .align 3 - .llong 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savelongs "(\start+1)",\end - .endif -.endm - savelongs 14,31 - b save_user_nvgprs_cont - -save_user_nvgprs_fault: - li r3,11 /* SIGSEGV */ - ld r4,TI_TASK(r12) - bl .force_sigsegv - - clrrdi r12,r1,THREAD_SHIFT - ld r9,TI_FLAGS(r12) - b save_user_nvgprs_cont /* * The sigsuspend and rt_sigsuspend system calls can call do_signal @@ -352,6 +290,16 @@ _GLOBAL(ppc_clone) bl .sys_clone b syscall_exit +_GLOBAL(ppc32_swapcontext) + bl .save_nvgprs + bl .compat_sys_swapcontext + b syscall_exit + +_GLOBAL(ppc64_swapcontext) + bl .save_nvgprs + bl .sys_swapcontext + b syscall_exit + _GLOBAL(ret_from_fork) bl .schedule_tail REST_NVGPRS(r1) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 400793c71304..bcb83574335b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -561,10 +561,7 @@ void do_syscall_trace_leave(struct pt_regs *regs) regs->result); if ((test_thread_flag(TIF_SYSCALL_TRACE) -#ifdef CONFIG_PPC64 - || test_thread_flag(TIF_SINGLESTEP) -#endif - ) + || test_thread_flag(TIF_SINGLESTEP)) && (current->ptrace & PT_PTRACED)) do_syscall_trace(); } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index bd837b5dbf06..d7a4e814974d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -151,10 +151,7 @@ static inline int save_general_regs(struct pt_regs *regs, elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; - if (!FULL_REGS(regs)) { - set_thread_flag(TIF_SAVE_NVGPRS); - current_thread_info()->nvgprs_frame = frame->mc_gregs; - } + WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { if (i == 14 && !FULL_REGS(regs)) @@ -215,15 +212,7 @@ static inline int get_old_sigaction(struct k_sigaction *new_ka, static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { - if (!FULL_REGS(regs)) { - /* Zero out the unsaved GPRs to avoid information - leak, and set TIF_SAVE_NVGPRS to ensure that the - registers do actually get saved later. */ - memset(®s->gpr[14], 0, 18 * sizeof(unsigned long)); - current_thread_info()->nvgprs_frame = &frame->mc_gregs; - set_thread_flag(TIF_SAVE_NVGPRS); - } - + WARN_ON(!FULL_REGS(regs)); return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); } @@ -826,8 +815,8 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int } long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) + struct ucontext __user *new_ctx, + int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) { unsigned char tmp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 497a5d3df359..4324f8a8ba24 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -118,14 +118,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ err |= __put_user(&sc->gp_regs, &sc->regs); - if (!FULL_REGS(regs)) { - /* Zero out the unsaved GPRs to avoid information - leak, and set TIF_SAVE_NVGPRS to ensure that the - registers do actually get saved later. */ - memset(®s->gpr[14], 0, 18 * sizeof(unsigned long)); - set_thread_flag(TIF_SAVE_NVGPRS); - current_thread_info()->nvgprs_frame = &sc->gp_regs; - } + WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->fp_regs, ¤t->thread.fpr, FP_REGS_SIZE); err |= __put_user(signr, &sc->signal); diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 8a9f994ed917..1ad55f0466fd 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -288,7 +288,7 @@ COMPAT_SYS(clock_settime) COMPAT_SYS(clock_gettime) COMPAT_SYS(clock_getres) COMPAT_SYS(clock_nanosleep) -COMPAT_SYS(swapcontext) +SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext) COMPAT_SYS(tgkill) COMPAT_SYS(utimes) COMPAT_SYS(statfs64) diff --git a/arch/ppc/kernel/asm-offsets.c b/arch/ppc/kernel/asm-offsets.c index 7964bf660e92..77e4dc780f8c 100644 --- a/arch/ppc/kernel/asm-offsets.c +++ b/arch/ppc/kernel/asm-offsets.c @@ -131,7 +131,6 @@ main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); - DEFINE(TI_SIGFRAME, offsetof(struct thread_info, nvgprs_frame)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index a48b950722a1..3a2815978488 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -227,7 +227,7 @@ ret_from_syscall: MTMSRD(r10) lwz r9,TI_FLAGS(r12) li r8,-_LAST_ERRNO - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmplw 0,r3,r8 blt+ syscall_exit_cont @@ -287,8 +287,10 @@ syscall_dotrace: syscall_exit_work: andi. r0,r9,_TIF_RESTOREALL - bne- 2f - cmplw 0,r3,r8 + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmplw 0,r3,r8 blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f @@ -302,9 +304,7 @@ syscall_exit_work: 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) beq 4f - /* Clear per-syscall TIF flags if any are set, but _leave_ - _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that - yet. */ + /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK addi r12,r12,TI_FLAGS @@ -318,8 +318,13 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) - beq 7f + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq ret_from_except + + /* Re-enable interrupts */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* Save NVGPRS if they're not saved already */ lwz r4,TRAP(r1) @@ -328,71 +333,11 @@ syscall_exit_work: SAVE_NVGPRS(r1) li r4,0xc00 stw r4,TRAP(r1) - - /* Re-enable interrupts */ -5: ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) - - andi. r0,r9,_TIF_SAVE_NVGPRS - bne save_user_nvgprs - -save_user_nvgprs_cont: - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq 7f - +5: addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_leave - REST_NVGPRS(r1) - -6: lwz r3,GPR3(r1) - LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - SYNC - MTMSRD(r10) /* disable interrupts again */ - rlwinm r12,r1,0,0,18 /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) -7: - andi. r0,r9,_TIF_NEED_RESCHED - bne 8f - lwz r5,_MSR(r1) - andi. r5,r5,MSR_PR - beq ret_from_except - andi. r0,r9,_TIF_SIGPENDING - beq ret_from_except - b do_user_signal -8: - ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) /* re-enable interrupts */ - bl schedule - b 6b - -save_user_nvgprs: - lwz r8,TI_SIGFRAME(r12) - -.macro savewords start, end - 1: stw \start,4*(\start)(r8) - .section __ex_table,"a" - .align 2 - .long 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savewords "(\start+1)",\end - .endif -.endm - savewords 14,31 - b save_user_nvgprs_cont - - -save_user_nvgprs_fault: - li r3,11 /* SIGSEGV */ - lwz r4,TI_TASK(r12) - bl force_sigsegv + b ret_from_except_full - rlwinm r12,r1,0,0,18 /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) - b save_user_nvgprs_cont - #ifdef SHOW_SYSCALLS do_show_syscall: #ifdef SHOW_SYSCALLS_TASK @@ -490,6 +435,14 @@ ppc_clone: stw r0,TRAP(r1) /* register set saved */ b sys_clone + .globl ppc_swapcontext +ppc_swapcontext: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_swapcontext + /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -683,7 +636,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) bne do_work restore_user: diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index 237fc2b72974..ffc7462d77ba 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -37,7 +37,6 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; - void __user *nvgprs_frame; /* low level flags - has atomic operations done on it */ unsigned long flags ____cacheline_aligned_in_smp; }; @@ -120,7 +119,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 10 #define TIF_SECCOMP 11 /* secure computing */ #define TIF_RESTOREALL 12 /* Restore all regs (implies NOERROR) */ -#define TIF_SAVE_NVGPRS 13 /* Save r14-r31 in signal frame */ #define TIF_NOERROR 14 /* Force successful syscall return */ #define TIF_RESTORE_SIGMASK 15 /* Restore signal mask in do_signal */ @@ -137,15 +135,13 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SINGLESTEP (1< Date: Wed, 8 Mar 2006 14:03:09 -0800 Subject: Mark the pipe file operations static They aren't used (nor even really usable) outside of pipe.c anyway Signed-off-by: Linus Torvalds --- fs/pipe.c | 6 +++--- include/linux/fs.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index d722579df79a..8aada8e426f4 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -605,7 +605,7 @@ struct file_operations rdwr_fifo_fops = { .fasync = pipe_rdwr_fasync, }; -struct file_operations read_pipe_fops = { +static struct file_operations read_pipe_fops = { .llseek = no_llseek, .read = pipe_read, .readv = pipe_readv, @@ -617,7 +617,7 @@ struct file_operations read_pipe_fops = { .fasync = pipe_read_fasync, }; -struct file_operations write_pipe_fops = { +static struct file_operations write_pipe_fops = { .llseek = no_llseek, .read = bad_pipe_r, .write = pipe_write, @@ -629,7 +629,7 @@ struct file_operations write_pipe_fops = { .fasync = pipe_write_fasync, }; -struct file_operations rdwr_pipe_fops = { +static struct file_operations rdwr_pipe_fops = { .llseek = no_llseek, .read = pipe_read, .readv = pipe_readv, diff --git a/include/linux/fs.h b/include/linux/fs.h index e059da947007..0cc34b1c42c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1418,9 +1418,6 @@ extern int is_bad_inode(struct inode *); extern struct file_operations read_fifo_fops; extern struct file_operations write_fifo_fops; extern struct file_operations rdwr_fifo_fops; -extern struct file_operations read_pipe_fops; -extern struct file_operations write_pipe_fops; -extern struct file_operations rdwr_pipe_fops; extern int fs_may_remount_ro(struct super_block *); -- cgit v1.2.3 From 1c6cc5fd32978ffdc4d0acf8592d3901adefbdad Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 7 Mar 2006 21:55:20 -0800 Subject: [PATCH] powerpc: restore eeh_add_device_late() prototype stub We fixed this: arch/powerpc/platforms/pseries/eeh.c: In function `eeh_add_device_tree_late': arch/powerpc/platforms/pseries/eeh.c:901: warning: implicit declaration of function `eeh_add_device_late' arch/powerpc/platforms/pseries/eeh.c: At top level: arch/powerpc/platforms/pseries/eeh.c:918: error: conflicting types for 'eeh_add_device_late' arch/powerpc/platforms/pseries/eeh.c:901: error: previous implicit declaration of 'eeh_add_device_late' was here make[2]: *** [arch/powerpc/platforms/pseries/eeh.o] Error 1 But we forgot the !CONFIG_EEH stub. Signed-off-by: Mark Fasheh Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-powerpc/eeh.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index eb392032e19b..5207758a6dd9 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -118,6 +118,8 @@ static inline void pci_addr_cache_build(void) { } static inline void eeh_add_device_early(struct device_node *dn) { } +static inline void eeh_add_device_late(struct pci_dev *dev) { } + static inline void eeh_remove_device(struct pci_dev *dev) { } static inline void eeh_add_device_tree_early(struct device_node *dn) { } -- cgit v1.2.3 From 707ced0d718e89b52b13aa55a64653083e792cca Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Tue, 7 Mar 2006 21:55:28 -0800 Subject: [PATCH] __get_unaligned() gcc-4 fix If the 'ptr' is a const, this code cause "assignment of read-only variable" error on gcc 4.x. Use __u64 instead of __typeof__(*(ptr)) for temporary variable to get rid of errors on gcc 4.x. Signed-off-by: Atsushi Nemoto Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/unaligned.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 4dc8ddb401c1..09ec447fe2af 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -78,7 +78,7 @@ static inline void __ustw(__u16 val, __u16 *addr) #define __get_unaligned(ptr, size) ({ \ const void *__gu_p = ptr; \ - __typeof__(*(ptr)) val; \ + __u64 val; \ switch (size) { \ case 1: \ val = *(const __u8 *)__gu_p; \ @@ -95,7 +95,7 @@ static inline void __ustw(__u16 val, __u16 *addr) default: \ bad_unaligned_access_length(); \ }; \ - val; \ + (__typeof__(*(ptr)))val; \ }) #define __put_unaligned(val, ptr, size) \ -- cgit v1.2.3 From e2bab3d92486fb781f4d06f56339264ed1492392 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 7 Mar 2006 21:55:31 -0800 Subject: [PATCH] percpu_counter_sum() Implement percpu_counter_sum(). This is a more accurate but slower version of percpu_counter_read_positive(). We need this for Alex's speedup-ext3_statfs patch and for the nr_file accounting fix. Otherwise these things would be too inaccurate on large CPU counts. Cc: Ravikiran G Thirumalai Cc: Alex Tomas Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 6 ++++++ mm/swap.c | 25 +++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index bd6708e2c027..682525511c9e 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -39,6 +39,7 @@ static inline void percpu_counter_destroy(struct percpu_counter *fbc) } void percpu_counter_mod(struct percpu_counter *fbc, long amount); +long percpu_counter_sum(struct percpu_counter *fbc); static inline long percpu_counter_read(struct percpu_counter *fbc) { @@ -92,6 +93,11 @@ static inline long percpu_counter_read_positive(struct percpu_counter *fbc) return fbc->count; } +static inline long percpu_counter_sum(struct percpu_counter *fbc) +{ + return percpu_counter_read_positive(fbc); +} + #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) diff --git a/mm/swap.c b/mm/swap.c index cce3dda59c59..e9ec06d845e8 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount) if (count >= FBC_BATCH || count <= -FBC_BATCH) { spin_lock(&fbc->lock); fbc->count += count; + *pcount = 0; spin_unlock(&fbc->lock); - count = 0; + } else { + *pcount = count; } - *pcount = count; put_cpu(); } EXPORT_SYMBOL(percpu_counter_mod); + +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive() + */ +long percpu_counter_sum(struct percpu_counter *fbc) +{ + long ret; + int cpu; + + spin_lock(&fbc->lock); + ret = fbc->count; + for_each_cpu(cpu) { + long *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + spin_unlock(&fbc->lock); + return ret < 0 ? 0 : ret; +} +EXPORT_SYMBOL(percpu_counter_sum); #endif /* -- cgit v1.2.3 From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:33 -0800 Subject: [PATCH] rcu batch tuning This patch adds new tunables for RCU queue and finished batches. There are two types of controls - number of completed RCU updates invoked in a batch (blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark, qlowmark). By default, the per-cpu batch limit is set to a small value. If the input RCU rate exceeds the high watermark, we do two things - force quiescent state on all cpus and set the batch limit of the CPU to INTMAX. Setting batch limit to INTMAX forces all finished RCUs to be processed in one shot. If we have more than INTMAX RCUs queued up, then we have bigger problems anyway. Once the incoming queued RCUs fall below the low watermark, the batch limit is set to the default. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 13 +++++++ include/linux/rcupdate.h | 6 ++- kernel/rcupdate.c | 76 ++++++++++++++++++++++++++++--------- 3 files changed, 76 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 75205391b335..bad5987c4727 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1284,6 +1284,19 @@ running once the system is up. New name for the ramdisk parameter. See Documentation/ramdisk.txt. + rcu.blimit= [KNL,BOOT] Set maximum number of finished + RCU callbacks to process in one batch. + + rcu.qhimark= [KNL,BOOT] Set threshold of queued + RCU callbacks over which batch limiting is disabled. + + rcu.qlowmark= [KNL,BOOT] Set threshold of queued + RCU callbacks below which batch limiting is re-enabled. + + rcu.rsinterval= [KNL,BOOT,SMP] Set the number of additional + RCU callbacks to queued before forcing reschedule + on all cpus. + rdinit= [KNL] Format: Run specified binary instead of /init from the ramdisk, diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b87aefa082e2..c2ec6c77874e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -98,13 +98,17 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; - long count; /* # of queued items */ + long qlen; /* # of queued callbacks */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; + long blimit; /* Upper limit on a processed batch */ int cpu; struct rcu_head barrier; +#ifdef CONFIG_SMP + long last_rs_qlen; /* qlen during the last resched */ +#endif }; DECLARE_PER_CPU(struct rcu_data, rcu_data); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0cf8146bd585..8cf15a569fcd 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10000; +static int blimit = 10; +static int qhimark = 10000; +static int qlowmark = 100; +#ifdef CONFIG_SMP +static int rsinterval = 1000; +#endif + +static atomic_t rcu_barrier_cpu_count; +static struct semaphore rcu_barrier_sema; +static struct completion rcu_barrier_completion; + +#ifdef CONFIG_SMP +static void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + int cpu; + cpumask_t cpumask; + set_need_resched(); + if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { + rdp->last_rs_qlen = rdp->qlen; + /* + * Don't send IPI to itself. With irqs disabled, + * rdp->cpu is the current cpu. + */ + cpumask = rcp->cpumask; + cpu_clear(rdp->cpu, cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_send_reschedule(cpu); + } +} +#else +static inline void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + set_need_resched(); +} +#endif /** * call_rcu - Queue an RCU callback for invocation after a grace period. @@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - - if (unlikely(++rdp->count > 10000)) - set_need_resched(); - + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } local_irq_restore(flags); } -static atomic_t rcu_barrier_cpu_count; -static struct semaphore rcu_barrier_sema; -static struct completion rcu_barrier_completion; - /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - rdp->count++; -/* - * Should we directly call rcu_do_batch() here ? - * if (unlikely(rdp->count > 10000)) - * rcu_do_batch(rdp); - */ + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_bh_ctrlblk); + } + local_irq_restore(flags); } @@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; - rdp->count--; - if (++count >= maxbatch) + rdp->qlen--; + if (++count >= rdp->blimit) break; } + if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; if (!rdp->donelist) rdp->donetail = &rdp->donelist; else @@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->quiescbatch = rcp->completed; rdp->qs_pending = 0; rdp->cpu = cpu; + rdp->blimit = blimit; } static void __devinit rcu_online_cpu(int cpu) @@ -567,7 +602,12 @@ void synchronize_kernel(void) synchronize_rcu(); } -module_param(maxbatch, int, 0); +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); +#ifdef CONFIG_SMP +module_param(rsinterval, int, 0); +#endif EXPORT_SYMBOL_GPL(rcu_batches_completed); EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ -- cgit v1.2.3 From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:35 -0800 Subject: [PATCH] fix file counting I have benchmarked this on an x86_64 NUMA system and see no significant performance difference on kernbench. Tested on both x86_64 and powerpc. The way we do file struct accounting is not very suitable for batched freeing. For scalability reasons, file accounting was constructor/destructor based. This meant that nr_files was decremented only when the object was removed from the slab cache. This is susceptible to slab fragmentation. With RCU based file structure, consequent batched freeing and a test program like Serge's, we just speed this up and end up with a very fragmented slab - llm22:~ # cat /proc/sys/fs/file-nr 587730 0 758844 At the same time, I see only a 2000+ objects in filp cache. The following patch I fixes this problem. This patch changes the file counting by removing the filp_count_lock. Instead we use a separate percpu counter, nr_files, for now and all accesses to it are through get_nr_files() api. In the sysctl handler for nr_files, we populate files_stat.nr_files before returning to user. Counting files as an when they are created and destroyed (as opposed to inside slab) allows us to correctly count open files with RCU. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- fs/file_table.c | 87 +++++++++++++++++++++++++++++++++------------------- include/linux/file.h | 2 -- include/linux/fs.h | 1 + kernel/sysctl.c | 5 ++- net/unix/af_unix.c | 2 +- 6 files changed, 62 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index a173bba32666..11dc83092d4a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1736,7 +1736,7 @@ void __init vfs_caches_init(unsigned long mempages) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); dcache_init(mempages); inode_init(mempages); diff --git a/fs/file_table.c b/fs/file_table.c index 768b58167543..44fabeaa9415 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -5,6 +5,7 @@ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ +#include #include #include #include @@ -19,52 +20,67 @@ #include #include #include +#include +#include + +#include /* sysctl tunables... */ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -EXPORT_SYMBOL(files_stat); /* Needed by unix.o */ - /* public. Not pretty! */ - __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); -static DEFINE_SPINLOCK(filp_count_lock); +static struct percpu_counter nr_files __cacheline_aligned_in_smp; -/* slab constructors and destructors are called from arbitrary - * context and must be fully threaded - use a local spinlock - * to protect files_stat.nr_files - */ -void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags) +static inline void file_free_rcu(struct rcu_head *head) { - if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - unsigned long flags; - spin_lock_irqsave(&filp_count_lock, flags); - files_stat.nr_files++; - spin_unlock_irqrestore(&filp_count_lock, flags); - } + struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + kmem_cache_free(filp_cachep, f); } -void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags) +static inline void file_free(struct file *f) { - unsigned long flags; - spin_lock_irqsave(&filp_count_lock, flags); - files_stat.nr_files--; - spin_unlock_irqrestore(&filp_count_lock, flags); + percpu_counter_dec(&nr_files); + call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } -static inline void file_free_rcu(struct rcu_head *head) +/* + * Return the total number of open files in the system + */ +static int get_nr_files(void) { - struct file *f = container_of(head, struct file, f_u.fu_rcuhead); - kmem_cache_free(filp_cachep, f); + return percpu_counter_read_positive(&nr_files); } -static inline void file_free(struct file *f) +/* + * Return the maximum number of open files in the system + */ +int get_max_files(void) { - call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); + return files_stat.max_files; } +EXPORT_SYMBOL_GPL(get_max_files); + +/* + * Handle nr_files sysctl + */ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + files_stat.nr_files = get_nr_files(); + return proc_dointvec(table, write, filp, buffer, lenp, ppos); +} +#else +int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} +#endif /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or @@ -78,14 +94,20 @@ struct file *get_empty_filp(void) /* * Privileged users can go above max_files */ - if (files_stat.nr_files >= files_stat.max_files && - !capable(CAP_SYS_ADMIN)) - goto over; + if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { + /* + * percpu_counters are inaccurate. Do an expensive check before + * we go and fail. + */ + if (percpu_counter_sum(&nr_files) >= files_stat.max_files) + goto over; + } f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); if (f == NULL) goto fail; + percpu_counter_inc(&nr_files); memset(f, 0, sizeof(*f)); if (security_file_alloc(f)) goto fail_sec; @@ -101,10 +123,10 @@ struct file *get_empty_filp(void) over: /* Ran out of filps - report that */ - if (files_stat.nr_files > old_max) { + if (get_nr_files() > old_max) { printk(KERN_INFO "VFS: file-max limit %d reached\n", - files_stat.max_files); - old_max = files_stat.nr_files; + get_max_files()); + old_max = get_nr_files(); } goto fail; @@ -276,4 +298,5 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + percpu_counter_init(&nr_files); } diff --git a/include/linux/file.h b/include/linux/file.h index 418b6101b59a..9901b850f2e4 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -60,8 +60,6 @@ extern void put_filp(struct file *); extern int get_unused_fd(void); extern void FASTCALL(put_unused_fd(unsigned int fd)); struct kmem_cache; -extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags); -extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags); extern struct file ** alloc_fd_array(int); extern void free_fd_array(struct file **, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cc34b1c42c9..51c0c93bdf93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -35,6 +35,7 @@ struct files_stat_struct { int max_files; /* tunable */ }; extern struct files_stat_struct files_stat; +extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index de2d9109194e..32b48e8ee36e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -50,6 +50,9 @@ #include #include +extern int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -943,7 +946,7 @@ static ctl_table fs_table[] = { .data = &files_stat, .maxlen = 3*sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_nr_files, }, { .ctl_name = FS_MAXFILE, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1b5989b1b670..c323cc6a28b0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -547,7 +547,7 @@ static struct sock * unix_create1(struct socket *sock) struct sock *sk = NULL; struct unix_sock *u; - if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files) + if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); -- cgit v1.2.3 From 2ec5e3a867d63d04932e11c6097f63760d9be3fe Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Tue, 7 Mar 2006 21:55:48 -0800 Subject: [PATCH] fix kexec asm While testing kexec and kdump we hit problems where the new kernel would freeze or instantly reboot. The easiest way to trigger it was to kexec a kernel compiled for CONFIG_M586 on an athlon cpu. Compiling for CONFIG_MK7 instead would work fine. The patch fixes a few problems with the kexec inline asm. Signed-off-by: Chris Mason Acked-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/machine_kexec.c | 14 +++++++------- arch/x86_64/kernel/machine_kexec.c | 2 +- include/asm-powerpc/kexec.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index a912fed48482..f73d7374a2ba 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -116,13 +116,13 @@ static void load_segments(void) __asm__ __volatile__ ( "\tljmp $"STR(__KERNEL_CS)",$1f\n" "\t1:\n" - "\tmovl $"STR(__KERNEL_DS)",%eax\n" - "\tmovl %eax,%ds\n" - "\tmovl %eax,%es\n" - "\tmovl %eax,%fs\n" - "\tmovl %eax,%gs\n" - "\tmovl %eax,%ss\n" - ); + "\tmovl $"STR(__KERNEL_DS)",%%eax\n" + "\tmovl %%eax,%%ds\n" + "\tmovl %%eax,%%es\n" + "\tmovl %%eax,%%fs\n" + "\tmovl %%eax,%%gs\n" + "\tmovl %%eax,%%ss\n" + ::: "eax", "memory"); #undef STR #undef __STR } diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 89fab51e20f4..25ac8a3faae6 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -140,7 +140,7 @@ static void load_segments(void) "\tmovl %0,%%ss\n" "\tmovl %0,%%fs\n" "\tmovl %0,%%gs\n" - : : "a" (__KERNEL_DS) + : : "a" (__KERNEL_DS) : "memory" ); } diff --git a/include/asm-powerpc/kexec.h b/include/asm-powerpc/kexec.h index bda2f217e6fe..6a2af2f6853b 100644 --- a/include/asm-powerpc/kexec.h +++ b/include/asm-powerpc/kexec.h @@ -93,7 +93,8 @@ static inline void crash_setup_regs(struct pt_regs *newregs, "mfxer %0\n" "std %0, 296(%2)\n" : "=&r" (tmp1), "=&r" (tmp2) - : "b" (newregs)); + : "b" (newregs) + : "memory"); } } #else -- cgit v1.2.3 From f9262c12c0084ddba445a9a42e98994018e51400 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 8 Mar 2006 17:57:25 -0800 Subject: [PATCH] i386: port ATI timer fix from x86_64 to i386 II ATI chipsets tend to generate double timer interrupts for the local APIC timer when both the 8254 and the IO-APIC timer pins are enabled. This is because they route it to both and the result is anded together and the CPU ends up processing it twice. This patch changes check_timer to disable the 8254 routing for interrupt 0. I think it would be safe on all chipsets actually (i tested it on a couple and it worked everywhere) and Windows seems to do it in a similar way, but to be conservative this patch only enables this mode on ATI (and adds options to enable/disable too) Ported over from a similar x86-64 change. I reused the ACPI earlyquirk infrastructure for the ATI bridge check, but tweaked it a bit to work even without ACPI. Inspired by a patch from Chuck Ebbert, but redone. Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 6 ++++++ arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/acpi/Makefile | 2 +- arch/i386/kernel/acpi/boot.c | 3 --- arch/i386/kernel/acpi/earlyquirk.c | 8 ++++++++ arch/i386/kernel/io_apic.c | 19 ++++++++++++++++++- arch/i386/kernel/setup.c | 4 ++++ include/asm-i386/apic.h | 2 ++ 8 files changed, 40 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index bad5987c4727..fc99075e0af4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -335,6 +335,12 @@ running once the system is up. timesource is not avalible, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } + disable_8254_timer + enable_8254_timer + [IA32/X86_64] Disable/Enable interrupt 0 timer routing + over the 8254 in addition to over the IO-APIC. The + kernel tries to set a sensible default. + hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 53bb9a79e274..65656c033d70 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -11,7 +11,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI) += acpi/ +obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index d51c7313cae8..7e9ac99354f4 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,4 +1,4 @@ -obj-y := boot.o +obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 79577f0ace98..f1a21945963d 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -1111,9 +1111,6 @@ int __init acpi_boot_table_init(void) disable_acpi(); return error; } -#ifdef __i386__ - check_acpi_pci(); -#endif acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index f1b9d2a46dab..2e3b643a4dc4 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -7,14 +7,22 @@ #include #include #include +#include static int __init check_bridge(int vendor, int device) { +#ifdef CONFIG_ACPI /* According to Nvidia all timer overrides are bogus. Just ignore them all. */ if (vendor == PCI_VENDOR_ID_NVIDIA) { acpi_skip_timer_override = 1; } +#endif + if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { + timer_over_8254 = 0; + printk(KERN_INFO "ATI board detected. Disabling timer routing " + "over 8254.\n"); + } return 0; } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 235822b3f41b..39d9a5fa907e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -51,6 +51,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); +int timer_over_8254 __initdata = 1; + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -2267,7 +2269,8 @@ static inline void check_timer(void) apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); timer_ack = 1; - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2392,6 +2395,20 @@ void __init setup_IO_APIC(void) print_IO_APIC(); } +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); + /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 51e513b4f72d..ab62a9f4701e 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1599,6 +1599,10 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); +#ifdef CONFIG_X86_IO_APIC + check_acpi_pci(); /* Checks more than just ACPI actually */ +#endif + #ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index d30b8571573f..ff9ac8d19eb2 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -137,6 +137,8 @@ void switch_APIC_timer_to_ipi(void *cpumask); void switch_ipi_to_APIC_timer(void *cpumask); #define ARCH_APICTIMER_STOPS_ON_C3 1 +extern int timer_over_8254; + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } -- cgit v1.2.3