diff options
Diffstat (limited to 'arch/s390')
39 files changed, 1088 insertions, 722 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 608193cfe43f..ff690564edbd 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -22,6 +22,14 @@ config RWSEM_XCHGADD_ALGORITHM bool default y +config ARCH_HAS_ILOG2_U32 + bool + default n + +config ARCH_HAS_ILOG2_U64 + bool + default n + config GENERIC_HWEIGHT bool default y @@ -33,9 +41,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_TIME def_bool y -config GENERIC_BUST_SPINLOCK - bool - mainmenu "Linux Kernel Configuration" config S390 @@ -181,7 +186,7 @@ config PACK_STACK config SMALL_STACK bool "Use 4kb/8kb for kernel stack instead of 8kb/16kb" - depends on PACK_STACK + depends on PACK_STACK && !LOCKDEP help If you say Y here and the compiler supports the -mkernel-backchain option the kernel will use a smaller kernel stack size. For 31 bit @@ -241,6 +246,9 @@ config ARCH_POPULATES_NODE_MAP source "mm/Kconfig" +config HOLES_IN_ZONE + def_bool y + comment "I/O subsystem configuration" config MACHCHK_WARNING @@ -264,14 +272,6 @@ config QDIO If unsure, say Y. -config QDIO_PERF_STATS - bool "Performance statistics in /proc" - depends on QDIO - help - Say Y here to get performance statistics in /proc/qdio_perf - - If unsure, say N. - config QDIO_DEBUG bool "Extended debugging information" depends on QDIO diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5deb9f7544a1..6598e5268573 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -35,6 +35,9 @@ cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109) +#KBUILD_IMAGE is necessary for make rpm +KBUILD_IMAGE :=arch/s390/boot/image + # # Prevent tail-call optimizations, to get clearer backtraces: # diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 45c9fa7d7545..b8c237290263 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -92,8 +92,8 @@ static int appldata_timer_active; * Work queue */ static struct workqueue_struct *appldata_wq; -static void appldata_work_fn(void *data); -static DECLARE_WORK(appldata_work, appldata_work_fn, NULL); +static void appldata_work_fn(struct work_struct *work); +static DECLARE_WORK(appldata_work, appldata_work_fn); /* @@ -125,7 +125,7 @@ static void appldata_timer_function(unsigned long data) * * call data gathering function for each (active) module */ -static void appldata_work_fn(void *data) +static void appldata_work_fn(struct work_struct *work) { struct list_head *lh; struct appldata_ops *ops; @@ -310,6 +310,7 @@ appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) { return -EFAULT; } + interval = 0; sscanf(buf, "%i", &interval); if (interval <= 0) { P_ERROR("Timer CPU interval has to be > 0!\n"); @@ -560,7 +561,6 @@ appldata_offline_cpu(int cpu) spin_unlock(&appldata_timer_lock); } -#ifdef CONFIG_HOTPLUG_CPU static int __cpuinit appldata_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -581,7 +581,6 @@ appldata_cpu_notify(struct notifier_block *self, static struct notifier_block appldata_nb = { .notifier_call = appldata_cpu_notify, }; -#endif /* * appldata_init() diff --git a/arch/s390/defconfig b/arch/s390/defconfig index a3257398ea8d..a6ec919ba83f 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18 -# Wed Oct 4 19:45:46 2006 +# Linux kernel version: 2.6.19-rc2 +# Wed Oct 18 17:11:10 2006 # CONFIG_MMU=y CONFIG_LOCKDEP_SUPPORT=y @@ -119,7 +119,6 @@ CONFIG_PACK_STACK=y CONFIG_CHECK_STACK=y CONFIG_STACK_GUARD=256 # CONFIG_WARN_STACK is not set -CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -135,7 +134,6 @@ CONFIG_RESOURCES_64BIT=y # CONFIG_MACHCHK_WARNING=y CONFIG_QDIO=y -# CONFIG_QDIO_PERF_STATS is not set # CONFIG_QDIO_DEBUG is not set # @@ -211,6 +209,7 @@ CONFIG_INET6_XFRM_MODE_TRANSPORT=y CONFIG_INET6_XFRM_MODE_TUNNEL=y CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_SUBTREES is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set @@ -528,6 +527,7 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set +# CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y @@ -646,10 +646,6 @@ CONFIG_MSDOS_PARTITION=y # CONFIG_NLS is not set # -# Distributed Lock Manager -# - -# # Instrumentation Support # @@ -669,7 +665,6 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=17 -# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set CONFIG_DEBUG_PREEMPT=y @@ -690,6 +685,7 @@ CONFIG_DEBUG_FS=y # CONFIG_FRAME_POINTER is not set # CONFIG_UNWIND_INFO is not set CONFIG_FORCED_INLINING=y +CONFIG_HEADERS_CHECK=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index cd702ae45d6d..b6716c4b9934 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -109,7 +109,7 @@ static void hypfs_drop_inode(struct inode *inode) static int hypfs_open(struct inode *inode, struct file *filp) { - char *data = filp->f_dentry->d_inode->i_private; + char *data = filp->f_path.dentry->d_inode->i_private; struct hypfs_sb_info *fs_info; if (filp->f_mode & FMODE_WRITE) { @@ -174,7 +174,7 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, struct hypfs_sb_info *fs_info; size_t count = iov_length(iov, nr_segs); - sb = iocb->ki_filp->f_dentry->d_inode->i_sb; + sb = iocb->ki_filp->f_path.dentry->d_inode->i_sb; fs_info = sb->s_fs_info; /* * Currently we only allow one update per second for two reasons: diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index aa978978d3d1..a81881c9b297 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -4,7 +4,7 @@ EXTRA_AFLAGS := -traditional -obj-y := bitmap.o traps.o time.o process.o \ +obj-y := bitmap.o traps.o time.o process.o reset.o \ setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ semaphore.o s390_ext.o debug.o profile.o irq.o ipl.o diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c index 9565a2dcfadc..5c46054195cb 100644 --- a/arch/s390/kernel/binfmt_elf32.c +++ b/arch/s390/kernel/binfmt_elf32.c @@ -176,7 +176,6 @@ struct elf_prpsinfo32 #include <linux/highuid.h> -#define elf_addr_t u32 /* #define init_elf_binfmt init_elf32_binfmt */ diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index e15e1489aef5..5b33f823863a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -295,6 +295,7 @@ static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) * * This is really horribly ugly. */ +#ifdef CONFIG_SYSVIPC asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) { if (call >> 16) /* hack for backward compatibility */ @@ -338,6 +339,7 @@ asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) return -ENOSYS; } +#endif asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) { @@ -755,7 +757,9 @@ asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) put_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp))) error = -EFAULT; } - copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); + if (copy_to_user(args->__unused, tmp.__unused, + sizeof(tmp.__unused))) + error = -EFAULT; } return error; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index d49b876a83bf..861888ab8c13 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -169,12 +169,12 @@ sys32_sigaction(int sig, const struct old_sigaction32 __user *act, compat_old_sigset_t mask; if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(sa_handler, &act->sa_handler) || - __get_user(sa_restorer, &act->sa_restorer)) + __get_user(sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; new_ka.sa.sa_handler = (__sighandler_t) sa_handler; new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); } @@ -185,10 +185,10 @@ sys32_sigaction(int sig, const struct old_sigaction32 __user *act, sa_restorer = (unsigned long) old_ka.sa.sa_restorer; if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(sa_handler, &oact->sa_handler) || - __put_user(sa_restorer, &oact->sa_restorer)) + __put_user(sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index cb0efae6802f..71e54ef0931e 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1664,4 +1664,4 @@ sys_getcpu_wrapper: llgtr %r2,%r2 # unsigned * llgtr %r3,%r3 # unsigned * llgtr %r4,%r4 # struct getcpu_cache * - jg sys_tee + jg sys_getcpu diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 1eae74e72f95..a5972f1541fe 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -21,14 +21,15 @@ static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; /* - * the caller of __cpcmd has to ensure that the response buffer is below 2 GB + * __cpcmd has some restrictions over cpcmd + * - the response buffer must reside below 2GB (if any) + * - __cpcmd is unlocked and therefore not SMP-safe */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { - unsigned long flags, cmdlen; + unsigned cmdlen; int return_code, return_len; - spin_lock_irqsave(&cpcmd_lock, flags); cmdlen = strlen(cmd); BUG_ON(cmdlen > 240); memcpy(cpcmd_buf, cmd, cmdlen); @@ -74,7 +75,6 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) : "+d" (reg3) : "d" (reg2) : "cc"); return_code = (int) reg3; } - spin_unlock_irqrestore(&cpcmd_lock, flags); if (response_code != NULL) *response_code = return_code; return return_len; @@ -82,15 +82,18 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) EXPORT_SYMBOL(__cpcmd); -#ifdef CONFIG_64BIT int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { char *lowbuf; int len; + unsigned long flags; if ((rlen == 0) || (response == NULL) - || !((unsigned long)response >> 31)) + || !((unsigned long)response >> 31)) { + spin_lock_irqsave(&cpcmd_lock, flags); len = __cpcmd(cmd, response, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); + } else { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { @@ -98,7 +101,9 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) "cpcmd: could not allocate response buffer\n"); return -ENOMEM; } + spin_lock_irqsave(&cpcmd_lock, flags); len = __cpcmd(cmd, lowbuf, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); memcpy(response, lowbuf, rlen); kfree(lowbuf); } @@ -106,4 +111,3 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) } EXPORT_SYMBOL(cpcmd); -#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 43f3d0c7e132..ef5266fbce62 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -603,13 +603,13 @@ debug_open(struct inode *inode, struct file *file) debug_info_t *debug_info, *debug_info_snapshot; down(&debug_lock); - debug_info = file->f_dentry->d_inode->i_private; + debug_info = file->f_path.dentry->d_inode->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) continue; else if (debug_info->debugfs_entries[i] == - file->f_dentry) { + file->f_path.dentry) { goto found; /* found view ! */ } } diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 0cf59bb7a857..8f8c802f1bcf 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -418,24 +418,6 @@ start: .gotr: l %r10,.tbl # EBCDIC to ASCII table tr 0(240,%r8),0(%r10) - stidp __LC_CPUID # Are we running on VM maybe - cli __LC_CPUID,0xff - bnz .test - .long 0x83300060 # diag 3,0,x'0060' - storage size - b .done -.test: - mvc 0x68(8),.pgmnw # set up pgm check handler - l %r2,.fourmeg - lr %r3,%r2 - bctr %r3,%r0 # 4M-1 -.loop: iske %r0,%r3 - ar %r3,%r2 -.pgmx: - sr %r3,%r2 - la %r3,1(%r3) -.done: - l %r1,.memsize - st %r3,ARCH_OFFSET(%r1) slr %r0,%r0 st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) @@ -443,9 +425,6 @@ start: .tbl: .long _ebcasc # translate table .cmd: .long COMMAND_LINE # address of command line buffer .parm: .long PARMAREA -.memsize: .long memory_size -.fourmeg: .long 0x00400000 # 4M -.pgmnw: .long 0x00080000,.pgmx .lowcase: .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 0a2c929486ab..4388b3309e0c 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -131,10 +131,11 @@ startup_continue: .long init_thread_union .Lpmask: .byte 0 -.align 8 + .align 8 .Lpcext:.long 0x00080000,0x80000000 .Lcr: .long 0x00 # place holder for cr0 + .align 8 .Lwaitsclp: .long 0x010a0000,0x80000000 + .Lsclph .Lrcp: @@ -156,7 +157,7 @@ startup_continue: slr %r4,%r4 # set start of chunk to zero slr %r5,%r5 # set end of chunk to zero slr %r6,%r6 # set access code to zero - la %r10, MEMORY_CHUNKS # number of chunks + la %r10,MEMORY_CHUNKS # number of chunks .Lloop: tprot 0(%r5),0 # test protection of first byte ipm %r7 @@ -176,8 +177,6 @@ startup_continue: st %r0,4(%r3) # store size of chunk st %r6,8(%r3) # store type of chunk la %r3,12(%r3) - l %r4,.Lmemsize-.LPG1(%r13) # address of variable memory_size - st %r5,0(%r4) # store last end to memory size ahi %r10,-1 # update chunk number .Lchkloop: lr %r6,%r7 # set access code to last cc @@ -292,7 +291,6 @@ startup_continue: .Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg .Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte .Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c -.Lmemsize:.long memory_size .Lmchunk:.long memory_chunk .Lmflags:.long machine_flags .Lbss_bgn: .long __bss_start diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 42f54d482441..c526279e1123 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -70,7 +70,20 @@ startup_continue: sgr %r5,%r5 # set src,length and pad to zero mvcle %r2,%r4,0 # clear mem jo .-4 # branch back, if not finish + # set program check new psw mask + mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) + larl %r1,.Lslowmemdetect # set program check address + stg %r1,__LC_PGM_NEW_PSW+8 + lghi %r1,0xc + diag %r0,%r1,0x260 # get memory size of virtual machine + cgr %r0,%r1 # different? -> old detection routine + jne .Lslowmemdetect + aghi %r1,1 # size is one more than end + larl %r2,memory_chunk + stg %r1,8(%r2) # store size of chunk + j .Ldonemem +.Lslowmemdetect: l %r2,.Lrcp-.LPG1(%r13) # Read SCP forced command word .Lservicecall: stosm .Lpmask-.LPG1(%r13),0x01 # authorize ext interrupts @@ -139,8 +152,6 @@ startup_continue: .int 0x100000 .Lfchunk: - # set program check new psw mask - mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) # # find memory chunks. @@ -175,8 +186,6 @@ startup_continue: stg %r0,8(%r3) # store size of chunk st %r6,20(%r3) # store type of chunk la %r3,24(%r3) - larl %r8,memory_size - stg %r5,0(%r8) # store memory size ahi %r10,-1 # update chunk number .Lchkloop: lr %r6,%r7 # set access code to last cc diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 1f5e782b3d05..a36bea1188d9 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -13,12 +13,21 @@ #include <linux/device.h> #include <linux/delay.h> #include <linux/reboot.h> +#include <linux/ctype.h> #include <asm/smp.h> #include <asm/setup.h> #include <asm/cpcmd.h> #include <asm/cio.h> +#include <asm/ebcdic.h> +#include <asm/reset.h> #define IPL_PARM_BLOCK_VERSION 0 +#define LOADPARM_LEN 8 + +extern char s390_readinfo_sccb[]; +#define SCCB_VALID (*((__u16*)&s390_readinfo_sccb[6]) == 0x0010) +#define SCCB_LOADPARM (&s390_readinfo_sccb[24]) +#define SCCB_FLAG (s390_readinfo_sccb[91]) enum ipl_type { IPL_TYPE_NONE = 1, @@ -289,9 +298,25 @@ static struct attribute_group ipl_fcp_attr_group = { /* CCW ipl device attributes */ +static ssize_t ipl_ccw_loadparm_show(struct subsystem *subsys, char *page) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + + if (!SCCB_VALID) + return sprintf(page, "#unknown#\n"); + memcpy(loadparm, SCCB_LOADPARM, LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + strstrip(loadparm); + return sprintf(page, "%s\n", loadparm); +} + +static struct subsys_attribute sys_ipl_ccw_loadparm_attr = + __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); + static struct attribute *ipl_ccw_attrs[] = { &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, NULL, }; @@ -348,8 +373,57 @@ static struct attribute_group reipl_fcp_attr_group = { DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_ccw->ipl_info.ccw.devno); +static void reipl_get_ascii_loadparm(char *loadparm) +{ + memcpy(loadparm, &reipl_block_ccw->ipl_info.ccw.load_param, + LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + loadparm[LOADPARM_LEN] = 0; + strstrip(loadparm); +} + +static ssize_t reipl_ccw_loadparm_show(struct subsystem *subsys, char *page) +{ + char buf[LOADPARM_LEN + 1]; + + reipl_get_ascii_loadparm(buf); + return sprintf(page, "%s\n", buf); +} + +static ssize_t reipl_ccw_loadparm_store(struct subsystem *subsys, + const char *buf, size_t len) +{ + int i, lp_len; + + /* ignore trailing newline */ + lp_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + lp_len--; + /* loadparm can have max 8 characters and must not start with a blank */ + if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' '))) + return -EINVAL; + /* loadparm can only contain "a-z,A-Z,0-9,SP,." */ + for (i = 0; i < lp_len; i++) { + if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') || + (buf[i] == '.')) + continue; + return -EINVAL; + } + /* initialize loadparm with blanks */ + memset(&reipl_block_ccw->ipl_info.ccw.load_param, ' ', LOADPARM_LEN); + /* copy and convert to ebcdic */ + memcpy(&reipl_block_ccw->ipl_info.ccw.load_param, buf, lp_len); + ASCEBC(reipl_block_ccw->ipl_info.ccw.load_param, LOADPARM_LEN); + return len; +} + +static struct subsys_attribute sys_reipl_ccw_loadparm_attr = + __ATTR(loadparm, 0644, reipl_ccw_loadparm_show, + reipl_ccw_loadparm_store); + static struct attribute *reipl_ccw_attrs[] = { &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, NULL, }; @@ -502,23 +576,6 @@ static struct subsys_attribute dump_type_attr = static decl_subsys(dump, NULL, NULL); -#ifdef CONFIG_SMP -static void dump_smp_stop_all(void) -{ - int cpu; - preempt_disable(); - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while (signal_processor(cpu, sigp_stop) == sigp_busy) - udelay(10); - } - preempt_enable(); -} -#else -#define dump_smp_stop_all() do { } while (0) -#endif - /* * Shutdown actions section */ @@ -571,11 +628,14 @@ void do_reipl(void) { struct ccw_dev_id devid; static char buf[100]; + char loadparm[LOADPARM_LEN + 1]; switch (reipl_type) { case IPL_TYPE_CCW: + reipl_get_ascii_loadparm(loadparm); printk(KERN_EMERG "reboot on ccw device: 0.0.%04x\n", reipl_block_ccw->ipl_info.ccw.devno); + printk(KERN_EMERG "loadparm = '%s'\n", loadparm); break; case IPL_TYPE_FCP: printk(KERN_EMERG "reboot on fcp device:\n"); @@ -588,12 +648,19 @@ void do_reipl(void) switch (reipl_method) { case IPL_METHOD_CCW_CIO: devid.devno = reipl_block_ccw->ipl_info.ccw.devno; + if (ipl_get_type() == IPL_TYPE_CCW && devid.devno == ipl_devno) + diag308(DIAG308_IPL, NULL); devid.ssid = 0; reipl_ccw_dev(&devid); break; case IPL_METHOD_CCW_VM: - sprintf(buf, "IPL %X", reipl_block_ccw->ipl_info.ccw.devno); - cpcmd(buf, NULL, 0, NULL); + if (strlen(loadparm) == 0) + sprintf(buf, "IPL %X", + reipl_block_ccw->ipl_info.ccw.devno); + else + sprintf(buf, "IPL %X LOADPARM '%s'", + reipl_block_ccw->ipl_info.ccw.devno, loadparm); + __cpcmd(buf, NULL, 0, NULL); break; case IPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); @@ -607,16 +674,17 @@ void do_reipl(void) diag308(DIAG308_IPL, NULL); break; case IPL_METHOD_FCP_RO_VM: - cpcmd("IPL", NULL, 0, NULL); + __cpcmd("IPL", NULL, 0, NULL); break; case IPL_METHOD_NONE: default: if (MACHINE_IS_VM) - cpcmd("IPL", NULL, 0, NULL); + __cpcmd("IPL", NULL, 0, NULL); diag308(DIAG308_IPL, NULL); break; } - panic("reipl failed!\n"); + printk(KERN_EMERG "reboot failed!\n"); + signal_processor(smp_processor_id(), sigp_stop_and_store_status); } static void do_dump(void) @@ -639,17 +707,17 @@ static void do_dump(void) switch (dump_method) { case IPL_METHOD_CCW_CIO: - dump_smp_stop_all(); + smp_send_stop(); devid.devno = dump_block_ccw->ipl_info.ccw.devno; devid.ssid = 0; reipl_ccw_dev(&devid); break; case IPL_METHOD_CCW_VM: - dump_smp_stop_all(); + smp_send_stop(); sprintf(buf, "STORE STATUS"); - cpcmd(buf, NULL, 0, NULL); + __cpcmd(buf, NULL, 0, NULL); sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); - cpcmd(buf, NULL, 0, NULL); + __cpcmd(buf, NULL, 0, NULL); break; case IPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, dump_block_ccw); @@ -746,6 +814,17 @@ static int __init reipl_ccw_init(void) reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; reipl_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw); reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + /* check if read scp info worked and set loadparm */ + if (SCCB_VALID) + memcpy(reipl_block_ccw->ipl_info.ccw.load_param, + SCCB_LOADPARM, LOADPARM_LEN); + else + /* read scp info failed: set empty loadparm (EBCDIC blanks) */ + memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40, + LOADPARM_LEN); + /* FIXME: check for diag308_set_works when enabling diag ccw reipl */ + if (!MACHINE_IS_VM) + sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; if (ipl_get_type() == IPL_TYPE_CCW) reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; reipl_capabilities |= IPL_TYPE_CCW; @@ -827,13 +906,11 @@ static int __init dump_ccw_init(void) return 0; } -extern char s390_readinfo_sccb[]; - static int __init dump_fcp_init(void) { int rc; - if(!(s390_readinfo_sccb[91] & 0x2)) + if(!(SCCB_FLAG & 0x2) || !SCCB_VALID) return 0; /* LDIPL DUMP is not installed */ if (!diag308_set_works) return 0; @@ -931,3 +1008,53 @@ static int __init s390_ipl_init(void) } __initcall(s390_ipl_init); + +static LIST_HEAD(rcall); +static DEFINE_MUTEX(rcall_mutex); + +void register_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_add(&reset->list, &rcall); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(register_reset_call); + +void unregister_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_del(&reset->list); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(unregister_reset_call); + +static void do_reset_calls(void) +{ + struct reset_call *reset; + + list_for_each_entry(reset, &rcall, list) + reset->fn(); +} + +extern void reset_mcck_handler(void); + +void s390_reset_system(void) +{ + struct _lowcore *lc; + + /* Stack for interrupt/machine check handler */ + lc = (struct _lowcore *)(unsigned long) store_prefix(); + lc->panic_stack = S390_lowcore.panic_stack; + + /* Disable prefixing */ + set_prefix(0); + + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + + /* Set new machine check handler */ + S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) &reset_mcck_handler; + do_reset_calls(); +} diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 67914fe7f317..576368c4f605 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -200,7 +200,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 60b1ea9f946b..f6d9bcc0f75b 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,15 +1,10 @@ /* * arch/s390/kernel/machine_kexec.c * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005,2006 * - * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> - * - */ - -/* - * s390_machine_kexec.c - handle the transition of Linux booting another kernel - * on the S390 architecture. + * Author(s): Rolf Adelsberger, + * Heiko Carstens <heiko.carstens@de.ibm.com> */ #include <linux/device.h> @@ -22,86 +17,49 @@ #include <asm/pgalloc.h> #include <asm/system.h> #include <asm/smp.h> +#include <asm/reset.h> -static void kexec_halt_all_cpus(void *); - -typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long); +typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; -int -machine_kexec_prepare(struct kimage *image) +int machine_kexec_prepare(struct kimage *image) { - unsigned long reboot_code_buffer; + void *reboot_code_buffer; /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; /* Get the destination where the assembler code should be copied to.*/ - reboot_code_buffer = page_to_pfn(image->control_code_page)<<PAGE_SHIFT; + reboot_code_buffer = (void *) page_to_phys(image->control_code_page); /* Then copy it */ - memcpy((void *) reboot_code_buffer, relocate_kernel, - relocate_kernel_len); + memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len); return 0; } -void -machine_kexec_cleanup(struct kimage *image) +void machine_kexec_cleanup(struct kimage *image) { } -void -machine_shutdown(void) +void machine_shutdown(void) { printk(KERN_INFO "kexec: machine_shutdown called\n"); } -NORET_TYPE void -machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { - clear_all_subchannels(); - cio_reset_channel_paths(); - - /* Disable lowcore protection */ - ctl_clear_bit(0,28); - - on_each_cpu(kexec_halt_all_cpus, image, 0, 0); - for (;;); -} - -extern void pfault_fini(void); - -static void -kexec_halt_all_cpus(void *kernel_image) -{ - static atomic_t cpuid = ATOMIC_INIT(-1); - int cpu; - struct kimage *image; relocate_kernel_t data_mover; -#ifdef CONFIG_PFAULT - if (MACHINE_IS_VM) - pfault_fini(); -#endif + smp_send_stop(); + pfault_fini(); + s390_reset_system(); - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); - - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while (!smp_cpu_not_running(cpu)) - cpu_relax(); - } - - image = (struct kimage *) kernel_image; - data_mover = (relocate_kernel_t) - (page_to_pfn(image->control_code_page) << PAGE_SHIFT); + data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ - (*data_mover) (&image->head, image->start); + (*data_mover)(&image->head, image->start); + for (;;); } diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 0340477f3b08..f9434d42ce9f 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -11,19 +11,10 @@ .globl do_reipl_asm do_reipl_asm: basr %r13,0 .Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) - - # switch off lowcore protection - -.Lpg1: stctl %c0,%c0,.Lctlsave1-.Lpg0(%r13) - stctl %c0,%c0,.Lctlsave2-.Lpg0(%r13) - ni .Lctlsave1-.Lpg0(%r13),0xef - lctl %c0,%c0,.Lctlsave1-.Lpg0(%r13) - - # do store status of all registers +.Lpg1: # do store status of all registers stm %r0,%r15,__LC_GPREGS_SAVE_AREA stctl %c0,%c15,__LC_CREGS_SAVE_AREA - mvc __LC_CREGS_SAVE_AREA(4),.Lctlsave2-.Lpg0(%r13) stam %a0,%a15,__LC_AREGS_SAVE_AREA stpx __LC_PREFIX_SAVE_AREA stckc .Lclkcmp-.Lpg0(%r13) @@ -56,8 +47,7 @@ do_reipl_asm: basr %r13,0 .L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 jz .L003 bas %r14,.Ldisab-.Lpg0(%r13) -.L003: spx .Lnull-.Lpg0(%r13) - st %r1,__LC_SUBCHANNEL_ID +.L003: st %r1,__LC_SUBCHANNEL_ID lpsw 0 sigp 0,0,0(6) .Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) @@ -65,9 +55,6 @@ do_reipl_asm: basr %r13,0 .align 8 .Lclkcmp: .quad 0x0000000000000000 .Lall: .long 0xff000000 -.Lnull: .long 0x00000000 -.Lctlsave1: .long 0x00000000 -.Lctlsave2: .long 0x00000000 .align 8 .Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 .Lpcnew: .long 0x00080000,0x80000000+.Lecs diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index de7435054f7c..f18ef260ca23 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -10,10 +10,10 @@ #include <asm/lowcore.h> .globl do_reipl_asm do_reipl_asm: basr %r13,0 +.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) +.Lpg1: # do store status of all registers - # do store status of all registers - -.Lpg0: stg %r1,.Lregsave-.Lpg0(%r13) + stg %r1,.Lregsave-.Lpg0(%r13) lghi %r1,0x1000 stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) lg %r0,.Lregsave-.Lpg0(%r13) @@ -27,11 +27,7 @@ do_reipl_asm: basr %r13,0 stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) - lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: lctlg %c6,%c6,.Lall-.Lpg0(%r13) - stctg %c0,%c0,.Lregsave-.Lpg0(%r13) - ni .Lregsave+4-.Lpg0(%r13),0xef - lctlg %c0,%c0,.Lregsave-.Lpg0(%r13) + lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) stsch .Lschib-.Lpg0(%r13) @@ -56,8 +52,7 @@ do_reipl_asm: basr %r13,0 .L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 jz .L003 bas %r14,.Ldisab-.Lpg0(%r13) -.L003: spx .Lnull-.Lpg0(%r13) - st %r1,__LC_SUBCHANNEL_ID +.L003: st %r1,__LC_SUBCHANNEL_ID lhi %r1,0 # mode 0 = esa slr %r0,%r0 # set cpuid to zero sigp %r1,%r0,0x12 # switch to esa mode @@ -70,7 +65,6 @@ do_reipl_asm: basr %r13,0 .Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 .Lregsave: .quad 0x0000000000000000 -.Lnull: .long 0x0000000000000000 .align 16 /* * These addresses have to be 31 bit otherwise diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index f9899ff2e5b0..3b456b80bcee 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -26,8 +26,7 @@ relocate_kernel: basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xf8 # disable DAT and IRQ (external) - spx zero64-.base(%r13) # absolute addressing mode + stnsm sys_msk-.base(%r13),0xfb # disable DAT stctl %c0,%c15,ctlregs-.base(%r13) stm %r0,%r15,gprregs-.base(%r13) la %r1,load_psw-.base(%r13) @@ -97,8 +96,6 @@ lpsw 0 # hopefully start new kernel... .align 8 - zero64: - .quad 0 load_psw: .long 0x00080000,0x80000000 sys_msk: diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S index 4fb443042d9c..1f9ea2067b59 100644 --- a/arch/s390/kernel/relocate_kernel64.S +++ b/arch/s390/kernel/relocate_kernel64.S @@ -27,8 +27,7 @@ relocate_kernel: basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xf8 # disable DAT and IRQs - spx zero64-.base(%r13) # absolute addressing mode + stnsm sys_msk-.base(%r13),0xfb # disable DAT stctg %c0,%c15,ctlregs-.base(%r13) stmg %r0,%r15,gprregs-.base(%r13) lghi %r0,3 @@ -100,8 +99,6 @@ lpsw 0 # hopefully start new kernel... .align 8 - zero64: - .quad 0 load_psw: .long 0x00080000,0x80000000 sys_msk: diff --git a/arch/s390/kernel/reset.S b/arch/s390/kernel/reset.S new file mode 100644 index 000000000000..be8688c0665c --- /dev/null +++ b/arch/s390/kernel/reset.S @@ -0,0 +1,48 @@ +/* + * arch/s390/kernel/reset.S + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <asm/ptrace.h> +#include <asm/lowcore.h> + +#ifdef CONFIG_64BIT + + .globl reset_mcck_handler +reset_mcck_handler: + basr %r13,0 +0: lg %r15,__LC_PANIC_STACK # load panic stack + aghi %r15,-STACK_FRAME_OVERHEAD + lg %r1,s390_reset_mcck_handler-0b(%r13) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 +1: la %r1,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + lpswe __LC_MCK_OLD_PSW + + .globl s390_reset_mcck_handler +s390_reset_mcck_handler: + .quad 0 + +#else /* CONFIG_64BIT */ + + .globl reset_mcck_handler +reset_mcck_handler: + basr %r13,0 +0: l %r15,__LC_PANIC_STACK # load panic stack + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,s390_reset_mcck_handler-0b(%r13) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lm %r0,%r15,__LC_GPREGS_SAVE_AREA + lpsw __LC_MCK_OLD_PSW + + .globl s390_reset_mcck_handler +s390_reset_mcck_handler: + .long 0 + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 49f2b68e32b1..49ef206ec880 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -62,13 +62,9 @@ EXPORT_SYMBOL_GPL(uaccess); unsigned int console_mode = 0; unsigned int console_devno = -1; unsigned int console_irq = -1; -unsigned long memory_size = 0; unsigned long machine_flags = 0; -struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS] = { { 0 } }; -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 + +struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ static unsigned long __initdata memory_end; @@ -228,11 +224,11 @@ static void __init conmode_default(void) char *ptr; if (MACHINE_IS_VM) { - __cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); + cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); console_irq = simple_strtoul(ptr + 13, NULL, 16); - __cpcmd("QUERY TERM", query_buffer, 1024, NULL); + cpcmd("QUERY TERM", query_buffer, 1024, NULL); ptr = strstr(query_buffer, "CONMODE"); /* * Set the conmode to 3215 so that the device recognition @@ -241,7 +237,7 @@ static void __init conmode_default(void) * 3215 and the 3270 driver will try to access the console * device (3215 as console and 3270 as normal tty). */ - __cpcmd("TERM CONMODE 3215", NULL, 0, NULL); + cpcmd("TERM CONMODE 3215", NULL, 0, NULL); if (ptr == NULL) { #if defined(CONFIG_SCLP_CONSOLE) SET_CONSOLE_SCLP; @@ -298,14 +294,14 @@ static void do_machine_restart_nonsmp(char * __unused) static void do_machine_halt_nonsmp(void) { if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0, NULL); + __cpcmd(vmhalt_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } static void do_machine_power_off_nonsmp(void) { if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0, NULL); + __cpcmd(vmpoff_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } @@ -434,7 +430,7 @@ setup_lowcore(void) lc->extended_save_area_addr = (__u32) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); /* enable extended save area */ - ctl_set_bit(14, 29); + __ctl_set_bit(14, 29); } #endif set_prefix((u32)(unsigned long) lc); @@ -473,6 +469,37 @@ setup_resources(void) } } +static void __init setup_memory_end(void) +{ + unsigned long real_size, memory_size; + unsigned long max_mem, max_phys; + int i; + + memory_size = real_size = 0; + max_phys = VMALLOC_END - VMALLOC_MIN_SIZE; + memory_end &= PAGE_MASK; + + max_mem = memory_end ? min(max_phys, memory_end) : max_phys; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; + + real_size = max(real_size, chunk->addr + chunk->size); + if (chunk->addr >= max_mem) { + memset(chunk, 0, sizeof(*chunk)); + continue; + } + if (chunk->addr + chunk->size > max_mem) + chunk->size = max_mem - chunk->addr; + memory_size = max(memory_size, chunk->addr + chunk->size); + } + if (!memory_end) + memory_end = memory_size; + if (real_size > memory_end) + printk("More memory detected than supported. Unused: %luk\n", + (real_size - memory_end) >> 10); +} + static void __init setup_memory(void) { @@ -616,8 +643,6 @@ setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - memory_end = memory_size; - if (MACHINE_HAS_MVCOS) memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); else @@ -625,20 +650,7 @@ setup_arch(char **cmdline_p) parse_early_param(); -#ifndef CONFIG_64BIT - memory_end &= ~0x400000UL; - - /* - * We need some free virtual space to be able to do vmalloc. - * On a machine with 2GB memory we make sure that we have at - * least 128 MB free space for vmalloc. - */ - if (memory_end > 1920*1024*1024) - memory_end = 1920*1024*1024; -#else /* CONFIG_64BIT */ - memory_end &= ~0x200000UL; -#endif /* CONFIG_64BIT */ - + setup_memory_end(); setup_memory(); setup_resources(); setup_lowcore(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 4392a77cbae8..4c8a7954ef48 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -80,10 +80,10 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, old_sigset_t mask; if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); } @@ -92,10 +92,10 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 62822245f9be..19090f7d4f51 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -230,18 +230,37 @@ static inline void do_store_status(void) } } +static inline void do_wait_for_stop(void) +{ + int cpu; + + /* Wait for all other cpus to enter stopped state */ + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + while(!smp_cpu_not_running(cpu)) + cpu_relax(); + } +} + /* * this function sends a 'stop' sigp to all other CPUs in the system. * it goes straight through. */ void smp_send_stop(void) { + /* Disable all interrupts/machine checks */ + __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK); + /* write magic number to zero page (absolute 0) */ lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; /* stop other processors. */ do_send_stop(); + /* wait until other processors are stopped */ + do_wait_for_stop(); + /* store status of other processors. */ do_store_status(); } @@ -250,88 +269,28 @@ void smp_send_stop(void) * Reboot, halt and power_off routines for SMP. */ -static void do_machine_restart(void * __unused) -{ - int cpu; - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); - - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while(!smp_cpu_not_running(cpu)) - cpu_relax(); - } - - /* Store status of other cpus. */ - do_store_status(); - - /* - * Finally call reipl. Because we waited for all other - * cpus to enter this function we know that they do - * not hold any s390irq-locks (the cpus have been - * interrupted by an external interrupt and s390irq - * locks are always held disabled). - */ - do_reipl(); -} - void machine_restart_smp(char * __unused) { - on_each_cpu(do_machine_restart, NULL, 0, 0); -} - -static void do_wait_for_stop(void) -{ - unsigned long cr[16]; - - __ctl_store(cr, 0, 15); - cr[0] &= ~0xffff; - cr[6] = 0; - __ctl_load(cr, 0, 15); - for (;;) - enabled_wait(); -} - -static void do_machine_halt(void * __unused) -{ - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) == -1) { - smp_send_stop(); - if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0, NULL); - signal_processor(smp_processor_id(), - sigp_stop_and_store_status); - } - do_wait_for_stop(); + smp_send_stop(); + do_reipl(); } void machine_halt_smp(void) { - on_each_cpu(do_machine_halt, NULL, 0, 0); -} - -static void do_machine_power_off(void * __unused) -{ - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) == -1) { - smp_send_stop(); - if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0, NULL); - signal_processor(smp_processor_id(), - sigp_stop_and_store_status); - } - do_wait_for_stop(); + smp_send_stop(); + if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) + __cpcmd(vmhalt_cmd, NULL, 0, NULL); + signal_processor(smp_processor_id(), sigp_stop_and_store_status); + for (;;); } void machine_power_off_smp(void) { - on_each_cpu(do_machine_power_off, NULL, 0, 0); + smp_send_stop(); + if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) + __cpcmd(vmpoff_cmd, NULL, 0, NULL); + signal_processor(smp_processor_id(), sigp_stop_and_store_status); + for (;;); } /* @@ -501,8 +460,6 @@ __init smp_count_cpus(void) */ extern void init_cpu_timer(void); extern void init_cpu_vtimer(void); -extern int pfault_init(void); -extern void pfault_fini(void); int __devinit start_secondary(void *cpuvoid) { @@ -514,11 +471,9 @@ int __devinit start_secondary(void *cpuvoid) #ifdef CONFIG_VIRT_TIMER init_cpu_vtimer(); #endif -#ifdef CONFIG_PFAULT /* Enable pfault pseudo page faults on this cpu. */ - if (MACHINE_IS_VM) - pfault_init(); -#endif + pfault_init(); + /* Mark this cpu as online */ cpu_set(smp_processor_id(), cpu_online_map); /* Switch on interrupts */ @@ -708,11 +663,8 @@ __cpu_disable(void) } cpu_clear(cpu, cpu_online_map); -#ifdef CONFIG_PFAULT /* Disable pfault pseudo page faults on this cpu. */ - if (MACHINE_IS_VM) - pfault_fini(); -#endif + pfault_fini(); memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); @@ -860,4 +812,3 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); EXPORT_SYMBOL(smp_call_function); EXPORT_SYMBOL(smp_get_cpu); EXPORT_SYMBOL(smp_put_cpu); - diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index e59baec56520..a4ceae3dbcf1 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -320,3 +320,4 @@ SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper) NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,sys_ni_syscall) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 66375a5e3d12..3cbb0dcf1f1d 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -58,12 +58,6 @@ int sysctl_userprocess_debug = 0; extern pgm_check_handler_t do_protection_exception; extern pgm_check_handler_t do_dat_exception; -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -extern void pfault_interrupt(__u16 error_code); -static ext_int_info_t ext_int_pfault; -#endif extern pgm_check_handler_t do_monitor_call; #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) @@ -135,7 +129,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) } } -void show_trace(struct task_struct *task, unsigned long * stack) +void show_trace(struct task_struct *task, unsigned long *stack) { register unsigned long __r15 asm ("15"); unsigned long sp; @@ -157,6 +151,9 @@ void show_trace(struct task_struct *task, unsigned long * stack) __show_trace(sp, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); printk("\n"); + if (!task) + task = current; + debug_show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *sp) @@ -462,7 +459,8 @@ asmlinkage void illegal_op(struct pt_regs * regs, long interruption_code) local_irq_enable(); if (regs->psw.mask & PSW_MASK_PSTATE) { - get_user(*((__u16 *) opcode), (__u16 __user *) location); + if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { if (current->ptrace & PT_PTRACED) force_sig(SIGTRAP, current); @@ -470,20 +468,25 @@ asmlinkage void illegal_op(struct pt_regs * regs, long interruption_code) signal = SIGILL; #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_b3(opcode, regs); } else if (opcode[0] == 0xed) { - get_user(*((__u32 *) (opcode+2)), - (__u32 __user *)(location+1)); + if (get_user(*((__u32 *) (opcode+2)), + (__u32 __user *)(location+1))) + return; signal = math_emu_ed(opcode, regs); } else if (*((__u16 *) opcode) == 0xb299) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_srnm(opcode, regs); } else if (*((__u16 *) opcode) == 0xb29c) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_stfpc(opcode, regs); } else if (*((__u16 *) opcode) == 0xb29d) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_lfpc(opcode, regs); #endif } else @@ -733,22 +736,5 @@ void __init trap_init(void) pgm_check_table[0x1C] = &space_switch_exception; pgm_check_table[0x1D] = &hfp_sqrt_exception; pgm_check_table[0x40] = &do_monitor_call; - - if (MACHINE_IS_VM) { -#ifdef CONFIG_PFAULT - /* - * Try to get pfault pseudo page faults going. - */ - if (register_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault) != 0) - panic("Couldn't request external interrupt 0x2603"); - - if (pfault_init() == 0) - return; - - /* Tough luck, no pfault. */ - unregister_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault); -#endif - } + pfault_irq_init(); } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index af9e69a03011..fe0f2e97ba7b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -83,13 +83,7 @@ SECTIONS __setup_end = .; __initcall_start = .; .initcall.init : { - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) + INITCALLS } __initcall_end = .; __con_initcall_start = .; diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index b0cfa6c4883d..b5f94cf3bde8 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -4,7 +4,7 @@ EXTRA_AFLAGS := -traditional -lib-y += delay.o string.o uaccess_std.o +lib-y += delay.o string.o uaccess_std.o uaccess_pt.o lib-$(CONFIG_32BIT) += div64.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 121b2935a422..f9a23d57eb79 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -27,6 +27,9 @@ #define SLR "slgr" #endif +extern size_t copy_from_user_std(size_t, const void __user *, void *); +extern size_t copy_to_user_std(size_t, void __user *, const void *); + size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) { register unsigned long reg0 asm("0") = 0x81UL; @@ -66,6 +69,13 @@ size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) return size; } +size_t copy_from_user_mvcos_check(size_t size, const void __user *ptr, void *x) +{ + if (size <= 256) + return copy_from_user_std(size, ptr, x); + return copy_from_user_mvcos(size, ptr, x); +} + size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) { register unsigned long reg0 asm("0") = 0x810000UL; @@ -95,6 +105,13 @@ size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) return size; } +size_t copy_to_user_mvcos_check(size_t size, void __user *ptr, const void *x) +{ + if (size <= 256) + return copy_to_user_std(size, ptr, x); + return copy_to_user_mvcos(size, ptr, x); +} + size_t copy_in_user_mvcos(size_t size, void __user *to, const void __user *from) { register unsigned long reg0 asm("0") = 0x810081UL; @@ -145,18 +162,16 @@ size_t clear_user_mvcos(size_t size, void __user *to) return size; } -extern size_t copy_from_user_std_small(size_t, const void __user *, void *); -extern size_t copy_to_user_std_small(size_t, void __user *, const void *); extern size_t strnlen_user_std(size_t, const char __user *); extern size_t strncpy_from_user_std(size_t, const char __user *, char *); extern int futex_atomic_op(int, int __user *, int, int *); extern int futex_atomic_cmpxchg(int __user *, int, int); struct uaccess_ops uaccess_mvcos = { - .copy_from_user = copy_from_user_mvcos, - .copy_from_user_small = copy_from_user_std_small, - .copy_to_user = copy_to_user_mvcos, - .copy_to_user_small = copy_to_user_std_small, + .copy_from_user = copy_from_user_mvcos_check, + .copy_from_user_small = copy_from_user_std, + .copy_to_user = copy_to_user_mvcos_check, + .copy_to_user_small = copy_to_user_std, .copy_in_user = copy_in_user_mvcos, .clear_user = clear_user_mvcos, .strnlen_user = strnlen_user_std, diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c new file mode 100644 index 000000000000..633249c3ba91 --- /dev/null +++ b/arch/s390/lib/uaccess_pt.c @@ -0,0 +1,154 @@ +/* + * arch/s390/lib/uaccess_pt.c + * + * User access functions based on page table walks. + * + * Copyright IBM Corp. 2006 + * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include <linux/errno.h> +#include <linux/mm.h> +#include <asm/uaccess.h> +#include <asm/futex.h> + +static inline int __handle_fault(struct mm_struct *mm, unsigned long address, + int write_access) +{ + struct vm_area_struct *vma; + int ret = -EFAULT; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (unlikely(!vma)) + goto out; + if (unlikely(vma->vm_start > address)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out; + if (expand_stack(vma, address)) + goto out; + } + + if (!write_access) { + /* page not present, check vm flags */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto out; + } else { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + } + +survive: + switch (handle_mm_fault(mm, vma, address, write_access)) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto out_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + ret = 0; +out: + up_read(&mm->mmap_sem); + return ret; + +out_of_memory: + up_read(&mm->mmap_sem); + if (is_init(current)) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", current->comm); + return ret; + +out_sigbus: + up_read(&mm->mmap_sem); + current->thread.prot_addr = address; + current->thread.trap_no = 0x11; + force_sig(SIGBUS, current); + return ret; +} + +static inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, + size_t n, int write_user) +{ + struct mm_struct *mm = current->mm; + unsigned long offset, pfn, done, size; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + void *from, *to; + + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + pgd = pgd_offset(mm, uaddr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto fault; + + pmd = pmd_offset(pgd, uaddr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto fault; + + pte = pte_offset_map(pmd, uaddr); + if (!pte || !pte_present(*pte) || + (write_user && !pte_write(*pte))) + goto fault; + + pfn = pte_pfn(*pte); + if (!pfn_valid(pfn)) + goto out; + + offset = uaddr & (PAGE_SIZE - 1); + size = min(n - done, PAGE_SIZE - offset); + if (write_user) { + to = (void *)((pfn << PAGE_SHIFT) + offset); + from = kptr + done; + } else { + from = (void *)((pfn << PAGE_SHIFT) + offset); + to = kptr + done; + } + memcpy(to, from, size); + done += size; + uaddr += size; + } while (done < n); +out: + spin_unlock(&mm->page_table_lock); + return n - done; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(mm, uaddr, write_user)) + return n - done; + goto retry; +} + +size_t copy_from_user_pt(size_t n, const void __user *from, void *to) +{ + size_t rc; + + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy(to, (void __kernel __force *) from, n); + return 0; + } + rc = __user_copy_pt((unsigned long) from, to, n, 0); + if (unlikely(rc)) + memset(to + n - rc, 0, rc); + return rc; +} + +size_t copy_to_user_pt(size_t n, void __user *to, const void *from) +{ + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy((void __kernel __force *) to, from, n); + return 0; + } + return __user_copy_pt((unsigned long) to, (void *) from, n, 1); +} diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index f44f0078b354..bbaca66fa293 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -11,7 +11,7 @@ #include <linux/errno.h> #include <linux/mm.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/futex.h> #ifndef __s390x__ @@ -28,6 +28,9 @@ #define SLR "slgr" #endif +extern size_t copy_from_user_pt(size_t n, const void __user *from, void *to); +extern size_t copy_to_user_pt(size_t n, void __user *to, const void *from); + size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) { unsigned long tmp1, tmp2; @@ -69,34 +72,11 @@ size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) return size; } -size_t copy_from_user_std_small(size_t size, const void __user *ptr, void *x) +size_t copy_from_user_std_check(size_t size, const void __user *ptr, void *x) { - unsigned long tmp1, tmp2; - - tmp1 = 0UL; - asm volatile( - "0: mvcp 0(%0,%2),0(%1),%3\n" - " "SLR" %0,%0\n" - " j 5f\n" - "1: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " "LHI" %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "2: mvcp 0(%4,%2),0(%1),%3\n" - " "SLR" %0,%4\n" - " "ALR" %2,%4\n" - "3:"LHI" %4,-1\n" - " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ - " bras %3,4f\n" - " xc 0(1,%2),0(%2)\n" - "4: ex %4,0(%3)\n" - "5:\n" - EX_TABLE(0b,1b) EX_TABLE(2b,3b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; + if (size <= 1024) + return copy_from_user_std(size, ptr, x); + return copy_from_user_pt(size, ptr, x); } size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) @@ -130,28 +110,11 @@ size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) return size; } -size_t copy_to_user_std_small(size_t size, void __user *ptr, const void *x) +size_t copy_to_user_std_check(size_t size, void __user *ptr, const void *x) { - unsigned long tmp1, tmp2; - - tmp1 = 0UL; - asm volatile( - "0: mvcs 0(%0,%1),0(%2),%3\n" - " "SLR" %0,%0\n" - " j 3f\n" - "1: la %4,255(%1)\n" /* ptr + 255 */ - " "LHI" %3,-4096\n" - " nr %4,%3\n" /* (ptr + 255) & -4096UL */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 3f\n" - "2: mvcs 0(%4,%1),0(%2),%3\n" - " "SLR" %0,%4\n" - "3:\n" - EX_TABLE(0b,1b) EX_TABLE(2b,3b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; + if (size <= 1024) + return copy_to_user_std(size, ptr, x); + return copy_to_user_pt(size, ptr, x); } size_t copy_in_user_std(size_t size, void __user *to, const void __user *from) @@ -295,7 +258,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -321,7 +284,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) default: ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); *old = oldval; return ret; } @@ -343,10 +306,10 @@ int futex_atomic_cmpxchg(int __user *uaddr, int oldval, int newval) } struct uaccess_ops uaccess_std = { - .copy_from_user = copy_from_user_std, - .copy_from_user_small = copy_from_user_std_small, - .copy_to_user = copy_to_user_std, - .copy_to_user_small = copy_to_user_std_small, + .copy_from_user = copy_from_user_std_check, + .copy_from_user_small = copy_from_user_std, + .copy_to_user = copy_to_user_std_check, + .copy_to_user_small = copy_to_user_std, .copy_in_user = copy_in_user_std, .clear_user = clear_user_std, .strnlen_user = strnlen_user_std, diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index aa9a42b6e62d..8e09db1edbb9 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o ioremap.o extmem.o mmap.o +obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o obj-$(CONFIG_CMM) += cmm.o diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 226275d5c4f6..775bf19e742b 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -14,12 +14,14 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/bootmem.h> +#include <linux/ctype.h> #include <asm/page.h> +#include <asm/pgtable.h> #include <asm/ebcdic.h> #include <asm/errno.h> #include <asm/extmem.h> #include <asm/cpcmd.h> -#include <linux/ctype.h> +#include <asm/setup.h> #define DCSS_DEBUG /* Debug messages on/off */ @@ -77,15 +79,11 @@ struct dcss_segment { int segcnt; }; -static DEFINE_SPINLOCK(dcss_lock); +static DEFINE_MUTEX(dcss_lock); static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; -extern struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS]; - /* * Create the 8 bytes, ebcdic VM segment name from * an ascii name. @@ -117,7 +115,7 @@ segment_by_name (char *name) struct list_head *l; struct dcss_segment *tmp, *retval = NULL; - assert_spin_locked(&dcss_lock); + BUG_ON(!mutex_is_locked(&dcss_lock)); dcss_mkname (name, dcss_name); list_for_each (l, &dcss_list) { tmp = list_entry (l, struct dcss_segment, list); @@ -241,65 +239,6 @@ query_segment_type (struct dcss_segment *seg) } /* - * check if the given segment collides with guest storage. - * returns 1 if this is the case, 0 if no collision was found - */ -static int -segment_overlaps_storage(struct dcss_segment *seg) -{ - int i; - - for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type != 0) - continue; - if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) - continue; - if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20) - < (seg->start_addr >> 20)) - continue; - return 1; - } - return 0; -} - -/* - * check if segment collides with other segments that are currently loaded - * returns 1 if this is the case, 0 if no collision was found - */ -static int -segment_overlaps_others (struct dcss_segment *seg) -{ - struct list_head *l; - struct dcss_segment *tmp; - - assert_spin_locked(&dcss_lock); - list_for_each(l, &dcss_list) { - tmp = list_entry(l, struct dcss_segment, list); - if ((tmp->start_addr >> 20) > (seg->end >> 20)) - continue; - if ((tmp->end >> 20) < (seg->start_addr >> 20)) - continue; - if (seg == tmp) - continue; - return 1; - } - return 0; -} - -/* - * check if segment exceeds the kernel mapping range (detected or set via mem=) - * returns 1 if this is the case, 0 if segment fits into the range - */ -static inline int -segment_exceeds_range (struct dcss_segment *seg) -{ - int seg_last_pfn = (seg->end) >> PAGE_SHIFT; - if (seg_last_pfn > max_pfn) - return 1; - return 0; -} - -/* * get info about a segment * possible return values: * -ENOSYS : we are not running on VM @@ -344,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = query_segment_type (seg); if (rc < 0) goto out_free; - if (segment_exceeds_range(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - exceeds" - " kernel mapping range\n",name); - rc = -ERANGE; + + rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + + switch (rc) { + case 0: + break; + case -ENOSPC: + PRINT_WARN("segment_load: not loading segment %s - overlaps " + "storage/segment\n", name); goto out_free; - } - if (segment_overlaps_storage(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " storage\n",name); - rc = -ENOSPC; + case -ERANGE: + PRINT_WARN("segment_load: not loading segment %s - exceeds " + "kernel mapping range\n", name); goto out_free; - } - if (segment_overlaps_others(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " other segments\n",name); - rc = -EBUSY; + default: + PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n", + name, rc); goto out_free; } + if (do_nonshared) dcss_command = DCSS_LOADNSR; else @@ -375,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = dcss_diag_translate_rc (seg->end); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &seg->start_addr, &seg->end); - goto out_free; + goto out_shared; } seg->do_nonshared = do_nonshared; atomic_set(&seg->ref_count, 1); @@ -394,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long (void*)seg->start_addr, (void*)seg->end, segtype_string[seg->vm_segtype]); goto out; + out_shared: + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -429,7 +372,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, if (!MACHINE_IS_VM) return -ENOSYS; - spin_lock (&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) rc = __segment_load (name, do_nonshared, addr, end); @@ -444,7 +387,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, rc = -EPERM; } } - spin_unlock (&dcss_lock); + mutex_unlock(&dcss_lock); return rc; } @@ -467,7 +410,7 @@ segment_modify_shared (char *name, int do_nonshared) unsigned long dummy; int dcss_command, rc, diag_cc; - spin_lock (&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { rc = -EINVAL; @@ -508,7 +451,7 @@ segment_modify_shared (char *name, int do_nonshared) &dummy, &dummy); kfree(seg); out_unlock: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); return rc; } @@ -526,21 +469,21 @@ segment_unload(char *name) if (!MACHINE_IS_VM) return; - spin_lock(&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { PRINT_ERR ("could not find segment %s in segment_unload, " "please report to linux390@de.ibm.com\n",name); goto out_unlock; } - if (atomic_dec_return(&seg->ref_count) == 0) { - list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &dummy, &dummy); - kfree(seg); - } + if (atomic_dec_return(&seg->ref_count) != 0) + goto out_unlock; + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + kfree(seg); out_unlock: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); } /* @@ -559,12 +502,13 @@ segment_save(char *name) if (!MACHINE_IS_VM) return; - spin_lock(&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { - PRINT_ERR ("could not find segment %s in segment_save, please report to linux390@de.ibm.com\n",name); - return; + PRINT_ERR("could not find segment %s in segment_save, please " + "report to linux390@de.ibm.com\n", name); + goto out; } startpfn = seg->start_addr >> PAGE_SHIFT; @@ -591,7 +535,7 @@ segment_save(char *name) goto out; } out: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); } EXPORT_SYMBOL(segment_load); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1c323bbfda91..cd85e34d8703 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -31,6 +31,7 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/kdebug.h> +#include <asm/s390_ext.h> #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 @@ -394,6 +395,7 @@ void do_dat_exception(struct pt_regs *regs, unsigned long error_code) /* * 'pfault' pseudo page faults routines. */ +static ext_int_info_t ext_int_pfault; static int pfault_disable = 0; static int __init nopfault(char *str) @@ -422,7 +424,7 @@ int pfault_init(void) __PF_RES_FIELD }; int rc; - if (pfault_disable) + if (!MACHINE_IS_VM || pfault_disable) return -1; asm volatile( " diag %1,%0,0x258\n" @@ -440,7 +442,7 @@ void pfault_fini(void) pfault_refbk_t refbk = { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; - if (pfault_disable) + if (!MACHINE_IS_VM || pfault_disable) return; __ctl_clear_bit(0,9); asm volatile( @@ -500,5 +502,25 @@ pfault_interrupt(__u16 error_code) set_tsk_need_resched(tsk); } } -#endif +void __init pfault_irq_init(void) +{ + if (!MACHINE_IS_VM) + return; + + /* + * Try to get pfault pseudo page faults going. + */ + if (register_early_external_interrupt(0x2603, pfault_interrupt, + &ext_int_pfault) != 0) + panic("Couldn't request external interrupt 0x2603"); + + if (pfault_init() == 0) + return; + + /* Tough luck, no pfault. */ + pfault_disable = 1; + unregister_early_external_interrupt(0x2603, pfault_interrupt, + &ext_int_pfault); +} +#endif diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index d99891718709..4bb21be3b007 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -24,6 +24,7 @@ #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/pfn.h> +#include <linux/poison.h> #include <asm/processor.h> #include <asm/system.h> @@ -69,6 +70,8 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; while (i-- > 0) { + if (!pfn_valid(i)) + continue; page = pfn_to_page(i); total++; if (PageReserved(page)) @@ -84,65 +87,52 @@ void show_mem(void) printk("%d pages swap cached\n",cached); } +static void __init setup_ro_region(void) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pte_t new_pte; + unsigned long address, end; + + address = ((unsigned long)&__start_rodata) & PAGE_MASK; + end = PFN_ALIGN((unsigned long)&__end_rodata); + + for (; address < end; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + pmd = pmd_offset(pgd, address); + pte = pte_offset_kernel(pmd, address); + new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); + set_pte(pte, new_pte); + } +} + +extern void vmem_map_init(void); + /* * paging_init() sets up the page tables */ - -#ifndef CONFIG_64BIT void __init paging_init(void) { - pgd_t * pg_dir; - pte_t * pg_table; - pte_t pte; - int i; - unsigned long tmp; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; - static const int ssm_mask = 0x04000000L; - unsigned long ro_start_pfn, ro_end_pfn; + pgd_t *pg_dir; + int i; + unsigned long pgdir_k; + static const int ssm_mask = 0x04000000L; unsigned long max_zone_pfns[MAX_NR_ZONES]; - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = max_low_pfn; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(max_zone_pfns); - - /* unmap whole virtual address space */ + pg_dir = swapper_pg_dir; - pg_dir = swapper_pg_dir; - +#ifdef CONFIG_64BIT + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE; for (i = 0; i < PTRS_PER_PGD; i++) - pmd_clear((pmd_t *) pg_dir++); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - while (pfn < max_low_pfn) { - /* - * pg_table is physical at this point - */ - pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); - - pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table); - pg_dir++; - - for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { - if (pfn >= ro_start_pfn && pfn < ro_end_pfn) - pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); - else - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_val(pte) = _PAGE_TYPE_EMPTY; - set_pte(pg_table, pte); - pfn++; - } - } + pgd_clear(pg_dir + i); +#else + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; + for (i = 0; i < PTRS_PER_PGD; i++) + pmd_clear((pmd_t *)(pg_dir + i)); +#endif + vmem_map_init(); + setup_ro_region(); S390_lowcore.kernel_asce = pgdir_k; @@ -152,82 +142,11 @@ void __init paging_init(void) __ctl_load(pgdir_k, 13, 13); __raw_local_irq_ssm(ssm_mask); - local_flush_tlb(); -} - -#else /* CONFIG_64BIT */ - -void __init paging_init(void) -{ - pgd_t * pg_dir; - pmd_t * pm_dir; - pte_t * pt_dir; - pte_t pte; - int i,j,k; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | - _KERN_REGION_TABLE; - static const int ssm_mask = 0x04000000L; - unsigned long ro_start_pfn, ro_end_pfn; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) { - - if (pfn >= max_low_pfn) { - pgd_clear(pg_dir); - continue; - } - - pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4); - pgd_populate(&init_mm, pg_dir, pm_dir); - - for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) { - if (pfn >= max_low_pfn) { - pmd_clear(pm_dir); - continue; - } - - pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); - - for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) { - if (pfn >= ro_start_pfn && pfn < ro_end_pfn) - pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); - else - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_val(pte) = _PAGE_TYPE_EMPTY; - set_pte(pt_dir, pte); - pfn++; - } - } - } - - S390_lowcore.kernel_asce = pgdir_k; - - /* enable virtual mapping in kernel mode */ - __ctl_load(pgdir_k, 1, 1); - __ctl_load(pgdir_k, 7, 7); - __ctl_load(pgdir_k, 13, 13); - __raw_local_irq_ssm(ssm_mask); - - local_flush_tlb(); } -#endif /* CONFIG_64BIT */ void __init mem_init(void) { @@ -257,6 +176,8 @@ void __init mem_init(void) printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&__start_rodata, PFN_ALIGN((unsigned long)&__end_rodata) - 1); + printk("Virtual memmap size: %ldk\n", + (max_pfn * sizeof(struct page)) >> 10); } void free_initmem(void) @@ -267,6 +188,7 @@ void free_initmem(void) for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c index 0f6e9ecbefe2..3d2100a4e209 100644 --- a/arch/s390/mm/ioremap.c +++ b/arch/s390/mm/ioremap.c @@ -15,87 +15,8 @@ #include <linux/vmalloc.h> #include <linux/mm.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/pgalloc.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - unsigned long pfn; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, __pgprot(flags))); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} - -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - if (address >= end) - BUG(); - do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -static int remap_area_pages(unsigned long address, unsigned long phys_addr, - unsigned long size, unsigned long flags) -{ - int error; - pgd_t * dir; - unsigned long end = address + size; - - phys_addr -= address; - dir = pgd_offset(&init_mm, address); - flush_cache_all(); - if (address >= end) - BUG(); - do { - pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); - error = -ENOMEM; - if (!pmd) - break; - if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - flush_tlb_all(); - return 0; -} /* * Generic mapping function (not visible outside): @@ -122,7 +43,8 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag if (!area) return NULL; addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, + phys_addr, __pgprot(flags))) { vfree(addr); return NULL; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c new file mode 100644 index 000000000000..7f2944d3ec2a --- /dev/null +++ b/arch/s390/mm/vmem.c @@ -0,0 +1,381 @@ +/* + * arch/s390/mm/vmem.c + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/bootmem.h> +#include <linux/pfn.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/list.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/tlbflush.h> + +unsigned long vmalloc_end; +EXPORT_SYMBOL(vmalloc_end); + +static struct page *vmem_map; +static DEFINE_MUTEX(vmem_mutex); + +struct memory_segment { + struct list_head list; + unsigned long start; + unsigned long size; +}; + +static LIST_HEAD(mem_segs); + +void memmap_init(unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn) +{ + struct page *start, *end; + struct page *map_start, *map_end; + int i; + + start = pfn_to_page(start_pfn); + end = start + size; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + unsigned long cstart, cend; + + cstart = PFN_DOWN(memory_chunk[i].addr); + cend = cstart + PFN_DOWN(memory_chunk[i].size); + + map_start = mem_map + cstart; + map_end = mem_map + cend; + + if (map_start < start) + map_start = start; + if (map_end > end) + map_end = end; + + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) + / sizeof(struct page); + map_end += ((PFN_ALIGN((unsigned long) map_end) + - (unsigned long) map_end) + / sizeof(struct page)); + + if (map_start < map_end) + memmap_init_zone((unsigned long)(map_end - map_start), + nid, zone, page_to_pfn(map_start)); + } +} + +static inline void *vmem_alloc_pages(unsigned int order) +{ + if (slab_is_available()) + return (void *)__get_free_pages(GFP_KERNEL, order); + return alloc_bootmem_pages((1 << order) * PAGE_SIZE); +} + +static inline pmd_t *vmem_pmd_alloc(void) +{ + pmd_t *pmd; + int i; + + pmd = vmem_alloc_pages(PMD_ALLOC_ORDER); + if (!pmd) + return NULL; + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_clear(pmd + i); + return pmd; +} + +static inline pte_t *vmem_pte_alloc(void) +{ + pte_t *pte; + pte_t empty_pte; + int i; + + pte = vmem_alloc_pages(PTE_ALLOC_ORDER); + if (!pte) + return NULL; + pte_val(empty_pte) = _PAGE_TYPE_EMPTY; + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(pte + i, empty_pte); + return pte; +} + +/* + * Add a physical memory range to the 1:1 mapping. + */ +static int vmem_add_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pm_dir); + } + + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + goto out; + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(pt_dir, pte); + } + ret = 0; +out: + flush_tlb_kernel_range(start, start + size); + return ret; +} + +/* + * Remove a physical memory range from the 1:1 mapping. + * Currently only invalidates page table entries. + */ +static void vmem_remove_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + + pte_val(pte) = _PAGE_TYPE_EMPTY; + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) + continue; + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) + continue; + pt_dir = pte_offset_kernel(pm_dir, address); + set_pte(pt_dir, pte); + } + flush_tlb_kernel_range(start, start + size); +} + +/* + * Add a backed mem_map array to the virtual mem_map array. + */ +static int vmem_add_mem_map(unsigned long start, unsigned long size) +{ + unsigned long address, start_addr, end_addr; + struct page *map_start, *map_end; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + map_start = vmem_map + PFN_DOWN(start); + map_end = vmem_map + PFN_DOWN(start + size); + + start_addr = (unsigned long) map_start & PAGE_MASK; + end_addr = PFN_ALIGN((unsigned long) map_end); + + for (address = start_addr; address < end_addr; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pm_dir); + } + + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + goto out; + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + if (pte_none(*pt_dir)) { + unsigned long new_page; + + new_page =__pa(vmem_alloc_pages(0)); + if (!new_page) + goto out; + pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(pt_dir, pte); + } + } + ret = 0; +out: + flush_tlb_kernel_range(start_addr, end_addr); + return ret; +} + +static int vmem_add_mem(unsigned long start, unsigned long size) +{ + int ret; + + ret = vmem_add_range(start, size); + if (ret) + return ret; + return vmem_add_mem_map(start, size); +} + +/* + * Add memory segment to the segment list if it doesn't overlap with + * an already present segment. + */ +static int insert_memory_segment(struct memory_segment *seg) +{ + struct memory_segment *tmp; + + if (PFN_DOWN(seg->start + seg->size) > max_pfn || + seg->start + seg->size < seg->start) + return -ERANGE; + + list_for_each_entry(tmp, &mem_segs, list) { + if (seg->start >= tmp->start + tmp->size) + continue; + if (seg->start + seg->size <= tmp->start) + continue; + return -ENOSPC; + } + list_add(&seg->list, &mem_segs); + return 0; +} + +/* + * Remove memory segment from the segment list. + */ +static void remove_memory_segment(struct memory_segment *seg) +{ + list_del(&seg->list); +} + +static void __remove_shared_memory(struct memory_segment *seg) +{ + remove_memory_segment(seg); + vmem_remove_range(seg->start, seg->size); +} + +int remove_shared_memory(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + + ret = -ENOENT; + list_for_each_entry(seg, &mem_segs, list) { + if (seg->start == start && seg->size == size) + break; + } + + if (seg->start != start || seg->size != size) + goto out; + + ret = 0; + __remove_shared_memory(seg); + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +int add_shared_memory(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + struct page *page; + unsigned long pfn, num_pfn, end_pfn; + int ret; + + mutex_lock(&vmem_mutex); + ret = -ENOMEM; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + goto out; + seg->start = start; + seg->size = size; + + ret = insert_memory_segment(seg); + if (ret) + goto out_free; + + ret = vmem_add_mem(start, size); + if (ret) + goto out_remove; + + pfn = PFN_DOWN(start); + num_pfn = PFN_DOWN(size); + end_pfn = pfn + num_pfn; + + page = pfn_to_page(pfn); + memset(page, 0, num_pfn * sizeof(struct page)); + + for (; pfn < end_pfn; pfn++) { + page = pfn_to_page(pfn); + init_page_count(page); + reset_page_mapcount(page); + SetPageReserved(page); + INIT_LIST_HEAD(&page->lru); + } + goto out; + +out_remove: + __remove_shared_memory(seg); +out_free: + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +/* + * map whole physical memory to virtual memory (identity mapping) + */ +void __init vmem_map_init(void) +{ + unsigned long map_size; + int i; + + map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page); + vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size); + vmem_map = (struct page *) vmalloc_end; + NODE_DATA(0)->node_mem_map = vmem_map; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) + vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); +} + +/* + * Convert memory chunk array to a memory segment list so there is a single + * list that contains both r/w memory and shared memory segments. + */ +static int __init vmem_convert_memory_chunk(void) +{ + struct memory_segment *seg; + int i; + + mutex_lock(&vmem_mutex); + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (!memory_chunk[i].size) + continue; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + panic("Out of memory...\n"); + seg->start = memory_chunk[i].addr; + seg->size = memory_chunk[i].size; + insert_memory_segment(seg); + } + mutex_unlock(&vmem_mutex); + return 0; +} + +core_initcall(vmem_convert_memory_chunk); |