From 35edc2a5095efb189e60dc32bbb9d2663aec6d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 20 Nov 2011 20:36:02 +0100 Subject: perf, arch: Rework perf_event_index() Put the logic to compute the event index into a per pmu method. This is required because the x86 rules are weird and wonderful and don't match the capabilities of the current scheme. AFAIK only powerpc actually has a usable userspace read of the PMCs but I'm not at all sure anybody actually used that. ARM is restored to the default since it currently does not support userspace access at all. And all software events are provided with a method that reports their index as 0 (disabled). Signed-off-by: Peter Zijlstra Cc: Michael Cree Cc: Will Deacon Cc: Deng-Cheng Zhu Cc: Anton Blanchard Cc: Eric B Munson Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Richard Kuo Cc: Stephane Eranian Cc: Arun Sharma Link: http://lkml.kernel.org/n/tip-dfydxodki16lylkt3gl2j7cw@git.kernel.org Signed-off-by: Ingo Molnar --- arch/s390/include/asm/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d2718..4eb444edbe49 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -6,4 +6,3 @@ /* Empty, just to avoid compiling error */ -#define PERF_EVENT_INDEX_OFFSET 0 -- cgit v1.2.3 From c3ab96f36aa308fa5bf432d5a4dafc80b7373805 Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Wed, 8 Feb 2012 00:19:49 +0000 Subject: qeth: add query OSA address table support Add qeth device private ioctl to query the OSA address table. This helps debugging hw related problems. Signed-off-by: Frank Blaschka Signed-off-by: David S. Miller --- arch/s390/include/asm/qeth.h | 7 +++ drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 99 +++++++++++++++++++++++++++++++++++++++ drivers/s390/net/qeth_core_mpc.h | 13 +++++ drivers/s390/net/qeth_l2_main.c | 3 ++ drivers/s390/net/qeth_l3_main.c | 3 ++ 6 files changed, 126 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/qeth.h b/arch/s390/include/asm/qeth.h index 90efda0b137d..2c7c898c03e4 100644 --- a/arch/s390/include/asm/qeth.h +++ b/arch/s390/include/asm/qeth.h @@ -20,6 +20,7 @@ #define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4) #define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5) #define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6) +#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7) struct qeth_arp_cache_entry { __u8 macaddr[6]; @@ -107,4 +108,10 @@ struct qeth_arp_query_user_data { char *entries; } __attribute__((packed)); +struct qeth_query_oat_data { + __u32 command; + __u32 buffer_len; + __u32 response_len; + __u64 ptr; +}; #endif /* __ASM_S390_QETH_IOCTL_H__ */ diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 4abc79d3963f..ec7921b5138e 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -906,6 +906,7 @@ void qeth_prepare_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, char); struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *); int qeth_mdio_read(struct net_device *, int, int); int qeth_snmp_command(struct qeth_card *, char __user *); +int qeth_query_oat_command(struct qeth_card *, char __user *); struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *, __u32, __u32); int qeth_default_setadapterparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9c3f38da4c01..0565584b52c3 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "qeth_core.h" @@ -4402,6 +4403,104 @@ int qeth_snmp_command(struct qeth_card *card, char __user *udata) } EXPORT_SYMBOL_GPL(qeth_snmp_command); +static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, + struct qeth_reply *reply, unsigned long data) +{ + struct qeth_ipa_cmd *cmd; + struct qeth_qoat_priv *priv; + char *resdata; + int resdatalen; + + QETH_CARD_TEXT(card, 3, "qoatcb"); + + cmd = (struct qeth_ipa_cmd *)data; + priv = (struct qeth_qoat_priv *)reply->param; + resdatalen = cmd->data.setadapterparms.hdr.cmdlength; + resdata = (char *)data + 28; + + if (resdatalen > (priv->buffer_len - priv->response_len)) { + cmd->hdr.return_code = IPA_RC_FFFF; + return 0; + } + + memcpy((priv->buffer + priv->response_len), resdata, + resdatalen); + priv->response_len += resdatalen; + + if (cmd->data.setadapterparms.hdr.seq_no < + cmd->data.setadapterparms.hdr.used_total) + return 1; + return 0; +} + +int qeth_query_oat_command(struct qeth_card *card, char __user *udata) +{ + int rc = 0; + struct qeth_cmd_buffer *iob; + struct qeth_ipa_cmd *cmd; + struct qeth_query_oat *oat_req; + struct qeth_query_oat_data oat_data; + struct qeth_qoat_priv priv; + void __user *tmp; + + QETH_CARD_TEXT(card, 3, "qoatcmd"); + + if (!qeth_adp_supported(card, IPA_SETADP_QUERY_OAT)) { + rc = -EOPNOTSUPP; + goto out; + } + + if (copy_from_user(&oat_data, udata, + sizeof(struct qeth_query_oat_data))) { + rc = -EFAULT; + goto out; + } + + priv.buffer_len = oat_data.buffer_len; + priv.response_len = 0; + priv.buffer = kzalloc(oat_data.buffer_len, GFP_KERNEL); + if (!priv.buffer) { + rc = -ENOMEM; + goto out; + } + + iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_OAT, + sizeof(struct qeth_ipacmd_setadpparms_hdr) + + sizeof(struct qeth_query_oat)); + cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); + oat_req = &cmd->data.setadapterparms.data.query_oat; + oat_req->subcmd_code = oat_data.command; + + rc = qeth_send_ipa_cmd(card, iob, qeth_setadpparms_query_oat_cb, + &priv); + if (!rc) { + if (is_compat_task()) + tmp = compat_ptr(oat_data.ptr); + else + tmp = (void __user *)(unsigned long)oat_data.ptr; + + if (copy_to_user(tmp, priv.buffer, + priv.response_len)) { + rc = -EFAULT; + goto out_free; + } + + oat_data.response_len = priv.response_len; + + if (copy_to_user(udata, &oat_data, + sizeof(struct qeth_query_oat_data))) + rc = -EFAULT; + } else + if (rc == IPA_RC_FFFF) + rc = -EFAULT; + +out_free: + kfree(priv.buffer); +out: + return rc; +} +EXPORT_SYMBOL_GPL(qeth_query_oat_command); + static inline int qeth_get_qdio_q_format(struct qeth_card *card) { switch (card->info.type) { diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index e5a9d1c03839..578e19a2de6b 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -249,6 +249,7 @@ enum qeth_ipa_setadp_cmd { IPA_SETADP_SET_PROMISC_MODE = 0x00000800L, IPA_SETADP_SET_DIAG_ASSIST = 0x00002000L, IPA_SETADP_SET_ACCESS_CONTROL = 0x00010000L, + IPA_SETADP_QUERY_OAT = 0x00080000L, }; enum qeth_ipa_mac_ops { CHANGE_ADDR_READ_MAC = 0, @@ -398,6 +399,17 @@ struct qeth_set_access_ctrl { __u32 subcmd_code; } __attribute__((packed)); +struct qeth_query_oat { + __u32 subcmd_code; + __u8 reserved[12]; +} __packed; + +struct qeth_qoat_priv { + __u32 buffer_len; + __u32 response_len; + char *buffer; +}; + struct qeth_ipacmd_setadpparms_hdr { __u32 supp_hw_cmds; __u32 reserved1; @@ -417,6 +429,7 @@ struct qeth_ipacmd_setadpparms { struct qeth_change_addr change_addr; struct qeth_snmp_cmd snmp; struct qeth_set_access_ctrl set_access_ctrl; + struct qeth_query_oat query_oat; __u32 mode; } data; } __attribute__ ((packed)); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index c12967133114..e5c9cf15e5c6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -75,6 +75,9 @@ static int qeth_l2_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) mii_data->val_out = qeth_mdio_read(dev, mii_data->phy_id, mii_data->reg_num); break; + case SIOC_QETH_QUERY_OAT: + rc = qeth_query_oat_command(card, rq->ifr_ifru.ifru_data); + break; default: rc = -EOPNOTSUPP; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 25cd3799a76c..73bf8889984b 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2745,6 +2745,9 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) mii_data->phy_id, mii_data->reg_num); break; + case SIOC_QETH_QUERY_OAT: + rc = qeth_query_oat_command(card, rq->ifr_ifru.ifru_data); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From ef64a54f6e558155b4f149bb10666b9e914b6c54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:31:34 +0000 Subject: sock: Introduce the SO_PEEK_OFF sock option This one specifies where to start MSG_PEEK-ing queue data from. When set to negative value means that MSG_PEEK works as ususally -- peeks from the head of the queue always. When some bytes are peeked from queue and the peeking offset is non negative it is moved forward so that the next peek will return next portion of data. When non-peeking recvmsg occurs and the peeking offset is non negative is is moved backward so that the next peek will still peek the proper data (i.e. the one that would have been picked if there were no non peeking recv in between). The offset is set using per-proto opteration to let the protocol handle the locking issues and to check whether the peeking offset feature is supported by the protocol the socket belongs to. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- arch/alpha/include/asm/socket.h | 1 + arch/arm/include/asm/socket.h | 1 + arch/avr32/include/asm/socket.h | 1 + arch/cris/include/asm/socket.h | 1 + arch/frv/include/asm/socket.h | 1 + arch/h8300/include/asm/socket.h | 1 + arch/ia64/include/asm/socket.h | 1 + arch/m32r/include/asm/socket.h | 1 + arch/m68k/include/asm/socket.h | 1 + arch/mips/include/asm/socket.h | 1 + arch/mn10300/include/asm/socket.h | 1 + arch/parisc/include/asm/socket.h | 1 + arch/powerpc/include/asm/socket.h | 1 + arch/s390/include/asm/socket.h | 1 + arch/sparc/include/asm/socket.h | 1 + arch/xtensa/include/asm/socket.h | 1 + include/asm-generic/socket.h | 1 + include/linux/net.h | 1 + include/net/sock.h | 25 +++++++++++++++++++++++++ net/core/sock.c | 13 +++++++++++++ 20 files changed, 56 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 082355f159e6..16449d330dae 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -71,6 +71,7 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index dec6f9afb3cf..d958c74e5260 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 247b88c760be..30078f98b3ab 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index e269264df7c4..048aba64600c 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -66,6 +66,7 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h index ce80fdadcce5..7a361810f3cc 100644 --- a/arch/frv/include/asm/socket.h +++ b/arch/frv/include/asm/socket.h @@ -64,6 +64,7 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index cf1daab6f27e..e7bbfcee5b99 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index 4b03664e3fb5..ced62de9d5a9 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -73,5 +73,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index e8b8c5bb053c..696cb4c7ca4e 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index d4708ce466e0..e8b41a6775f9 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index ad5c0a7a02a7..52104872e9e3 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -84,6 +84,7 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #ifdef __KERNEL__ diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h index 876356d78522..013fcc51698f 100644 --- a/arch/mn10300/include/asm/socket.h +++ b/arch/mn10300/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index d28c51b61067..f717c9bec16f 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -63,6 +63,7 @@ #define SO_WIFI_STATUS 0x4022 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 0x4023 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index 2fc2af8fbf59..fe1c0b478fd7 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -71,5 +71,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index 67b5c1b14b51..581702fa1b0c 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -72,5 +72,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index 8af1b64168b3..68e2e2746f6f 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -60,6 +60,7 @@ #define SO_WIFI_STATUS 0x0025 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 0x0026 /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index bb06968be227..74818b161362 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -75,5 +75,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h index 49c1704173e7..d9aaac0c36d4 100644 --- a/include/asm-generic/socket.h +++ b/include/asm-generic/socket.h @@ -67,4 +67,5 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/linux/net.h b/include/linux/net.h index b29923006b11..be60c7f5e145 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -206,6 +206,7 @@ struct proto_ops { int offset, size_t size, int flags); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); + void (*set_peek_off)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..9c0553b9e451 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -357,6 +357,7 @@ struct sock { struct page *sk_sndmsg_page; struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; + __s32 sk_peek_off; int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; @@ -373,6 +374,30 @@ struct sock { void (*sk_destruct)(struct sock *sk); }; +static inline int sk_peek_offset(struct sock *sk, int flags) +{ + if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0)) + return sk->sk_peek_off; + else + return 0; +} + +static inline void sk_peek_offset_bwd(struct sock *sk, int val) +{ + if (sk->sk_peek_off >= 0) { + if (sk->sk_peek_off >= val) + sk->sk_peek_off -= val; + else + sk->sk_peek_off = 0; + } +} + +static inline void sk_peek_offset_fwd(struct sock *sk, int val) +{ + if (sk->sk_peek_off >= 0) + sk->sk_peek_off += val; +} + /* * Hashed lists helper routines */ diff --git a/net/core/sock.c b/net/core/sock.c index 02f8dfe320b7..19942d4bb6e6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -793,6 +793,12 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; + case SO_PEEK_OFF: + if (sock->ops->set_peek_off) + sock->ops->set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + break; default: ret = -ENOPROTOOPT; break; @@ -1018,6 +1024,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); break; + case SO_PEEK_OFF: + if (!sock->ops->set_peek_off) + return -EOPNOTSUPP; + + v.val = sk->sk_peek_off; + break; default: return -ENOPROTOOPT; } @@ -2092,6 +2104,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; + sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; -- cgit v1.2.3 From c5905afb0ee6550b42c49213da1c22d67316c194 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Feb 2012 08:31:31 +0100 Subject: static keys: Introduce 'struct static_key', static_key_true()/false() and static_key_slow_[inc|dec]() So here's a boot tested patch on top of Jason's series that does all the cleanups I talked about and turns jump labels into a more intuitive to use facility. It should also address the various misconceptions and confusions that surround jump labels. Typical usage scenarios: #include struct static_key key = STATIC_KEY_INIT_TRUE; if (static_key_false(&key)) do unlikely code else do likely code Or: if (static_key_true(&key)) do likely code else do unlikely code The static key is modified via: static_key_slow_inc(&key); ... static_key_slow_dec(&key); The 'slow' prefix makes it abundantly clear that this is an expensive operation. I've updated all in-kernel code to use this everywhere. Note that I (intentionally) have not pushed through the rename blindly through to the lowest levels: the actual jump-label patching arch facility should be named like that, so we want to decouple jump labels from the static-key facility a bit. On non-jump-label enabled architectures static keys default to likely()/unlikely() branches. Signed-off-by: Ingo Molnar Acked-by: Jason Baron Acked-by: Steven Rostedt Cc: a.p.zijlstra@chello.nl Cc: mathieu.desnoyers@efficios.com Cc: davem@davemloft.net Cc: ddaney.cavm@gmail.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20120222085809.GA26397@elte.hu Signed-off-by: Ingo Molnar --- arch/Kconfig | 29 ++++--- arch/ia64/include/asm/paravirt.h | 6 +- arch/ia64/kernel/paravirt.c | 4 +- arch/mips/include/asm/jump_label.h | 2 +- arch/powerpc/include/asm/jump_label.h | 2 +- arch/s390/include/asm/jump_label.h | 2 +- arch/sparc/include/asm/jump_label.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +- arch/x86/include/asm/paravirt.h | 6 +- arch/x86/kernel/kvm.c | 4 +- arch/x86/kernel/paravirt.c | 4 +- arch/x86/kvm/mmu_audit.c | 8 +- include/linux/jump_label.h | 139 ++++++++++++++++++++++++---------- include/linux/netdevice.h | 4 +- include/linux/netfilter.h | 6 +- include/linux/perf_event.h | 12 +-- include/linux/static_key.h | 1 + include/linux/tracepoint.h | 8 +- include/net/sock.h | 6 +- kernel/events/core.c | 16 ++-- kernel/jump_label.c | 128 ++++++++++++++++++------------- kernel/sched/core.c | 18 ++--- kernel/sched/fair.c | 8 +- kernel/sched/sched.h | 14 ++-- kernel/tracepoint.c | 20 ++--- net/core/dev.c | 24 +++--- net/core/net-sysfs.c | 4 +- net/core/sock.c | 4 +- net/core/sysctl_net_core.c | 4 +- net/ipv4/tcp_memcontrol.c | 6 +- net/netfilter/core.c | 6 +- 31 files changed, 298 insertions(+), 205 deletions(-) create mode 100644 include/linux/static_key.h (limited to 'arch/s390/include') diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be11..5b448a74d0f7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -47,18 +47,29 @@ config KPROBES If in doubt, say "N". config JUMP_LABEL - bool "Optimize trace point call sites" + bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL help + This option enables a transparent branch optimization that + makes certain almost-always-true or almost-always-false branch + conditions even cheaper to execute within the kernel. + + Certain performance-sensitive kernel code, such as trace points, + scheduler functionality, networking code and KVM have such + branches and include support for this optimization technique. + If it is detected that the compiler has support for "asm goto", - the kernel will compile trace point locations with just a - nop instruction. When trace points are enabled, the nop will - be converted to a jump to the trace function. This technique - lowers overhead and stress on the branch prediction of the - processor. - - On i386, options added to the compiler flags may increase - the size of the kernel slightly. + the kernel will compile such branches with just a nop + instruction. When the condition flag is toggled to true, the + nop will be converted to a jump instruction to execute the + conditional block of instructions. + + This technique lowers overhead and stress on the branch prediction + of the processor and generally makes the kernel faster. The update + of the condition is slower, but those are always very rare. + + ( On 32-bit x86, the necessary options added to the compiler + flags may increase the size of the kernel slightly. ) config OPTPROBES def_bool y diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d304cd7..b149b88ea795 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) pv_time_ops.init_missing_ticks_accounting(cpu); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline int paravirt_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 100868216c55..1b22f6de2932 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { * pv_time_ops * time operations */ -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static int ia64_native_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b316ca45..4d6d77ed9b9d 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,7 +20,7 @@ #define WORD_INSN ".word" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\tnop\n\t" "nop\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e412f1..ae098c438f00 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -13,7 +13,7 @@ #define ASM_ALIGN ".balign 4" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82366f8..5080d16a832f 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,7 +7,7 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ead..3a16c1483b45 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -9,12 +9,12 @@ #define JUMP_LABEL_NOP_SIZE 5 -#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" +#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:" - JUMP_LABEL_INITIAL_NOP + STATIC_KEY_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9a74fb..c0180fd372d2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline u64 paravirt_steal_clock(int cpu) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176b..694d801bf606 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -438,9 +438,9 @@ void __init kvm_guest_init(void) static __init int activate_jump_labels(void) { if (has_steal_clock) { - jump_label_inc(¶virt_steal_enabled); + static_key_slow_inc(¶virt_steal_enabled); if (steal_acc) - jump_label_inc(¶virt_steal_rq_enabled); + static_key_slow_inc(¶virt_steal_rq_enabled); } return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e6bc40..ada2f99388dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) __native_flush_tlb_single(addr); } -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static u64 native_steal_clock(int cpu) { diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc07a6b..ea7b4fd34676 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) } static bool mmu_audit; -static struct jump_label_key mmu_audit_key; +static struct static_key mmu_audit_key; static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { @@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { - if (static_branch((&mmu_audit_key))) + if (static_key_false((&mmu_audit_key))) __kvm_mmu_audit(vcpu, point); } @@ -259,7 +259,7 @@ static void mmu_audit_enable(void) if (mmu_audit) return; - jump_label_inc(&mmu_audit_key); + static_key_slow_inc(&mmu_audit_key); mmu_audit = true; } @@ -268,7 +268,7 @@ static void mmu_audit_disable(void) if (!mmu_audit) return; - jump_label_dec(&mmu_audit_key); + static_key_slow_dec(&mmu_audit_key); mmu_audit = false; } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f7c69580fea7..2172da2d9bb4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -9,15 +9,15 @@ * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support the result - * of a "if (static_branch(&key))" statement is a unconditional branch (which + * of a "if (static_key_false(&key))" statement is a unconditional branch (which * defaults to false - and the true block is placed out of line). * - * However at runtime we can change the 'static' branch target using - * jump_label_{inc,dec}(). These function as a 'reference' count on the key + * However at runtime we can change the branch target using + * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key * object and for as long as there are references all branches referring to * that particular key will point to the (out of line) true block. * - * Since this relies on modifying code the jump_label_{inc,dec}() functions + * Since this relies on modifying code the static_key_slow_{inc,dec}() functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total @@ -26,12 +26,26 @@ * * When the control is directly exposed to userspace it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) - * cause significant performance degradation. Struct jump_label_key_deferred and - * jump_label_dec_deferred() provide for this. + * cause significant performance degradation. Struct static_key_deferred and + * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, it falls back to a simple * conditional branch. - */ + * + * struct static_key my_key = STATIC_KEY_INIT_TRUE; + * + * if (static_key_true(&my_key)) { + * } + * + * will result in the true case being in-line and starts the key with a single + * reference. Mixing static_key_true() and static_key_false() on the same key is not + * allowed. + * + * Not initializing the key (static data is initialized to 0s anyway) is the + * same as using STATIC_KEY_INIT_FALSE and static_key_false() is + * equivalent with static_branch(). + * +*/ #include #include @@ -39,16 +53,17 @@ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) -struct jump_label_key { +struct static_key { atomic_t enabled; +/* Set lsb bit to 1 if branch is default true, 0 ot */ struct jump_entry *entries; #ifdef CONFIG_MODULES - struct jump_label_mod *next; + struct static_key_mod *next; #endif }; -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; unsigned long timeout; struct delayed_work work; }; @@ -66,13 +81,34 @@ struct module; #ifdef HAVE_JUMP_LABEL -#ifdef CONFIG_MODULES -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} -#else -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} -#endif +#define JUMP_LABEL_TRUE_BRANCH 1UL + +static +inline struct jump_entry *jump_label_get_entries(struct static_key *key) +{ + return (struct jump_entry *)((unsigned long)key->entries + & ~JUMP_LABEL_TRUE_BRANCH); +} + +static inline bool jump_label_get_branch_default(struct static_key *key) +{ + if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH) + return true; + return false; +} + +static __always_inline bool static_key_false(struct static_key *key) +{ + return arch_static_branch(key); +} -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_true(struct static_key *key) +{ + return !static_key_false(key); +} + +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) { return arch_static_branch(key); } @@ -88,21 +124,24 @@ extern void arch_jump_label_transform(struct jump_entry *entry, extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern int jump_label_text_reserved(void *start, void *end); -extern void jump_label_inc(struct jump_label_key *key); -extern void jump_label_dec(struct jump_label_key *key); -extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); -extern bool jump_label_enabled(struct jump_label_key *key); +extern void static_key_slow_inc(struct static_key *key); +extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_dec_deferred(struct static_key_deferred *key); +extern bool static_key_enabled(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); -extern void jump_label_rate_limit(struct jump_label_key_deferred *key, - unsigned long rl); +extern void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) #else /* !HAVE_JUMP_LABEL */ #include -#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} - -struct jump_label_key { +struct static_key { atomic_t enabled; }; @@ -110,30 +149,45 @@ static __always_inline void jump_label_init(void) { } -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; }; -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_false(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static __always_inline bool static_key_true(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled))) + if (likely(atomic_read(&key->enabled)) > 0) return true; return false; } -static inline void jump_label_inc(struct jump_label_key *key) +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static inline void static_key_slow_inc(struct static_key *key) { atomic_inc(&key->enabled); } -static inline void jump_label_dec(struct jump_label_key *key) +static inline void static_key_slow_dec(struct static_key *key) { atomic_dec(&key->enabled); } -static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) +static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { - jump_label_dec(&key->key); + static_key_slow_dec(&key->key); } static inline int jump_label_text_reserved(void *start, void *end) @@ -144,9 +198,9 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -static inline bool jump_label_enabled(struct jump_label_key *key) +static inline bool static_key_enabled(struct static_key *key) { - return !!atomic_read(&key->enabled); + return (atomic_read(&key->enabled) > 0); } static inline int jump_label_apply_nops(struct module *mod) @@ -154,13 +208,20 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } -static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, +static inline void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { } + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1) }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) + #endif /* HAVE_JUMP_LABEL */ -#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) -#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) +#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#define jump_label_enabled static_key_enabled #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..7dfaae7846ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -214,8 +214,8 @@ enum { #include #ifdef CONFIG_RPS -#include -extern struct jump_label_key rps_needed; +#include +extern struct static_key rps_needed; #endif struct neighbour; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b809265607d0..29734be334c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if defined(CONFIG_JUMP_LABEL) -#include -extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#include +extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) - return static_branch(&nf_hooks_needed[pf][hook]); + return static_key_false(&nf_hooks_needed[pf][hook]); return !list_empty(&nf_hooks[pf][hook]); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 412b790f5da6..0d21e6f1cf53 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -514,7 +514,7 @@ struct perf_guest_info_callbacks { #include #include #include -#include +#include #include #include @@ -1038,7 +1038,7 @@ static inline int is_software_event(struct perf_event *event) return event->pmu->task_ctx_nr == perf_sw_context; } -extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); @@ -1066,7 +1066,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; - if (static_branch(&perf_swevent_enabled[event_id])) { + if (static_key_false(&perf_swevent_enabled[event_id])) { if (!regs) { perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; @@ -1075,12 +1075,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) } } -extern struct jump_label_key_deferred perf_sched_events; +extern struct static_key_deferred perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); } @@ -1089,7 +1089,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); } diff --git a/include/linux/static_key.h b/include/linux/static_key.h new file mode 100644 index 000000000000..27bd3f8a0857 --- /dev/null +++ b/include/linux/static_key.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index fc36da97ff7e..bd96ecd0e05c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include struct module; struct tracepoint; @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - struct jump_label_key key; + struct static_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; @@ -145,7 +145,7 @@ static inline void tracepoint_synchronize_unregister(void) extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (static_branch(&__tracepoint_##name.key)) \ + if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ @@ -188,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void) __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"))) = \ - { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ + { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ static struct tracepoint * const __tracepoint_ptr_##name __used \ __attribute__((section("__tracepoints_ptrs"))) = \ &__tracepoint_##name; diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..dcde2d9268cd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include @@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #endif /* SOCK_REFCNT_DEBUG */ #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) -extern struct jump_label_key memcg_socket_limit_enabled; +extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) { return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); } -#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) +#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) #else #define mem_cgroup_sockets_enabled 0 static inline struct cg_proto *parent_cg_proto(struct proto *proto, diff --git a/kernel/events/core.c b/kernel/events/core.c index 7c3b9de55f6b..5e0f8bb89b2b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -128,7 +128,7 @@ enum event_type_t { * perf_sched_events : >0 events exist * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ -struct jump_label_key_deferred perf_sched_events __read_mostly; +struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static atomic_t nr_mmap_events __read_mostly; @@ -2769,7 +2769,7 @@ static void free_event(struct perf_event *event) if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_dec_deferred(&perf_sched_events); + static_key_slow_dec_deferred(&perf_sched_events); if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); if (event->attr.comm) @@ -2780,7 +2780,7 @@ static void free_event(struct perf_event *event) put_callchain_buffers(); if (is_cgroup_event(event)) { atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_dec_deferred(&perf_sched_events); + static_key_slow_dec_deferred(&perf_sched_events); } } @@ -4982,7 +4982,7 @@ fail: return err; } -struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) { @@ -4990,7 +4990,7 @@ static void sw_perf_event_destroy(struct perf_event *event) WARN_ON(event->parent); - jump_label_dec(&perf_swevent_enabled[event_id]); + static_key_slow_dec(&perf_swevent_enabled[event_id]); swevent_hlist_put(event); } @@ -5020,7 +5020,7 @@ static int perf_swevent_init(struct perf_event *event) if (err) return err; - jump_label_inc(&perf_swevent_enabled[event_id]); + static_key_slow_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; } @@ -5843,7 +5843,7 @@ done: if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_inc(&perf_sched_events.key); + static_key_slow_inc(&perf_sched_events.key); if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); if (event->attr.comm) @@ -6081,7 +6081,7 @@ SYSCALL_DEFINE5(perf_event_open, * - that may need work on context switch */ atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_inc(&perf_sched_events.key); + static_key_slow_inc(&perf_sched_events.key); } /* diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 543782e7cdd2..bf9dcadbb53a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #ifdef HAVE_JUMP_LABEL @@ -29,10 +29,11 @@ void jump_label_unlock(void) mutex_unlock(&jump_label_mutex); } -bool jump_label_enabled(struct jump_label_key *key) +bool static_key_enabled(struct static_key *key) { - return !!atomic_read(&key->enabled); + return (atomic_read(&key->enabled) > 0); } +EXPORT_SYMBOL_GPL(static_key_enabled); static int jump_label_cmp(const void *a, const void *b) { @@ -58,22 +59,26 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); } -static void jump_label_update(struct jump_label_key *key, int enable); +static void jump_label_update(struct static_key *key, int enable); -void jump_label_inc(struct jump_label_key *key) +void static_key_slow_inc(struct static_key *key) { if (atomic_inc_not_zero(&key->enabled)) return; jump_label_lock(); - if (atomic_read(&key->enabled) == 0) - jump_label_update(key, JUMP_LABEL_ENABLE); + if (atomic_read(&key->enabled) == 0) { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_ENABLE); + else + jump_label_update(key, JUMP_LABEL_DISABLE); + } atomic_inc(&key->enabled); jump_label_unlock(); } -EXPORT_SYMBOL_GPL(jump_label_inc); +EXPORT_SYMBOL_GPL(static_key_slow_inc); -static void __jump_label_dec(struct jump_label_key *key, +static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { @@ -85,32 +90,35 @@ static void __jump_label_dec(struct jump_label_key *key, if (rate_limit) { atomic_inc(&key->enabled); schedule_delayed_work(work, rate_limit); - } else - jump_label_update(key, JUMP_LABEL_DISABLE); - + } else { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_DISABLE); + else + jump_label_update(key, JUMP_LABEL_ENABLE); + } jump_label_unlock(); } -EXPORT_SYMBOL_GPL(jump_label_dec); static void jump_label_update_timeout(struct work_struct *work) { - struct jump_label_key_deferred *key = - container_of(work, struct jump_label_key_deferred, work.work); - __jump_label_dec(&key->key, 0, NULL); + struct static_key_deferred *key = + container_of(work, struct static_key_deferred, work.work); + __static_key_slow_dec(&key->key, 0, NULL); } -void jump_label_dec(struct jump_label_key *key) +void static_key_slow_dec(struct static_key *key) { - __jump_label_dec(key, 0, NULL); + __static_key_slow_dec(key, 0, NULL); } +EXPORT_SYMBOL_GPL(static_key_slow_dec); -void jump_label_dec_deferred(struct jump_label_key_deferred *key) +void static_key_slow_dec_deferred(struct static_key_deferred *key) { - __jump_label_dec(&key->key, key->timeout, &key->work); + __static_key_slow_dec(&key->key, key->timeout, &key->work); } +EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); - -void jump_label_rate_limit(struct jump_label_key_deferred *key, +void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { key->timeout = rl; @@ -153,7 +161,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry arch_jump_label_transform(entry, type); } -static void __jump_label_update(struct jump_label_key *key, +static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, int enable) { @@ -170,27 +178,40 @@ static void __jump_label_update(struct jump_label_key *key, } } +static enum jump_label_type jump_label_type(struct static_key *key) +{ + bool true_branch = jump_label_get_branch_default(key); + bool state = static_key_enabled(key); + + if ((!true_branch && state) || (true_branch && !state)) + return JUMP_LABEL_ENABLE; + + return JUMP_LABEL_DISABLE; +} + void __init jump_label_init(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; - struct jump_label_key *key = NULL; + struct static_key *key = NULL; struct jump_entry *iter; jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - struct jump_label_key *iterk; + struct static_key *iterk; - iterk = (struct jump_label_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? - JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + iterk = (struct static_key *)(unsigned long)iter->key; + arch_jump_label_transform_static(iter, jump_label_type(iterk)); if (iterk == key) continue; key = iterk; - key->entries = iter; + /* + * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. + */ + *((unsigned long *)&key->entries) += (unsigned long)iter; #ifdef CONFIG_MODULES key->next = NULL; #endif @@ -200,8 +221,8 @@ void __init jump_label_init(void) #ifdef CONFIG_MODULES -struct jump_label_mod { - struct jump_label_mod *next; +struct static_key_mod { + struct static_key_mod *next; struct jump_entry *entries; struct module *mod; }; @@ -221,9 +242,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end) start, end); } -static void __jump_label_mod_update(struct jump_label_key *key, int enable) +static void __jump_label_mod_update(struct static_key *key, int enable) { - struct jump_label_mod *mod = key->next; + struct static_key_mod *mod = key->next; while (mod) { struct module *m = mod->mod; @@ -254,11 +275,7 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) { - struct jump_label_key *iterk; - - iterk = (struct jump_label_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? - JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); } } @@ -267,8 +284,8 @@ static int jump_label_add_module(struct module *mod) struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - struct jump_label_key *key = NULL; - struct jump_label_mod *jlm; + struct static_key *key = NULL; + struct static_key_mod *jlm; /* if the module doesn't have jump label entries, just return */ if (iter_start == iter_stop) @@ -277,28 +294,30 @@ static int jump_label_add_module(struct module *mod) jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->key == (jump_label_t)(unsigned long)key) - continue; + struct static_key *iterk; - key = (struct jump_label_key *)(unsigned long)iter->key; + iterk = (struct static_key *)(unsigned long)iter->key; + if (iterk == key) + continue; + key = iterk; if (__module_address(iter->key) == mod) { - atomic_set(&key->enabled, 0); - key->entries = iter; + /* + * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. + */ + *((unsigned long *)&key->entries) += (unsigned long)iter; key->next = NULL; continue; } - - jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL); + jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm) return -ENOMEM; - jlm->mod = mod; jlm->entries = iter; jlm->next = key->next; key->next = jlm; - if (jump_label_enabled(key)) + if (jump_label_type(key) == JUMP_LABEL_ENABLE) __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); } @@ -310,14 +329,14 @@ static void jump_label_del_module(struct module *mod) struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - struct jump_label_key *key = NULL; - struct jump_label_mod *jlm, **prev; + struct static_key *key = NULL; + struct static_key_mod *jlm, **prev; for (iter = iter_start; iter < iter_stop; iter++) { if (iter->key == (jump_label_t)(unsigned long)key) continue; - key = (struct jump_label_key *)(unsigned long)iter->key; + key = (struct static_key *)(unsigned long)iter->key; if (__module_address(iter->key) == mod) continue; @@ -419,9 +438,10 @@ int jump_label_text_reserved(void *start, void *end) return ret; } -static void jump_label_update(struct jump_label_key *key, int enable) +static void jump_label_update(struct static_key *key, int enable) { - struct jump_entry *entry = key->entries, *stop = __stop___jump_table; + struct jump_entry *stop = __stop___jump_table; + struct jump_entry *entry = jump_label_get_entries(key); #ifdef CONFIG_MODULES struct module *mod = __module_address((unsigned long)key); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5255c9d2e053..112c6824476b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v) #ifdef HAVE_JUMP_LABEL -#define jump_label_key__true jump_label_key_enabled -#define jump_label_key__false jump_label_key_disabled +#define jump_label_key__true STATIC_KEY_INIT_TRUE +#define jump_label_key__false STATIC_KEY_INIT_FALSE #define SCHED_FEAT(name, enabled) \ jump_label_key__##enabled , -struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { +struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { #include "features.h" }; @@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (jump_label_enabled(&sched_feat_keys[i])) - jump_label_dec(&sched_feat_keys[i]); + if (static_key_enabled(&sched_feat_keys[i])) + static_key_slow_dec(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!jump_label_enabled(&sched_feat_keys[i])) - jump_label_inc(&sched_feat_keys[i]); + if (!static_key_enabled(&sched_feat_keys[i])) + static_key_slow_inc(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; @@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) delta -= irq_delta; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - if (static_branch((¶virt_steal_rq_enabled))) { + if (static_key_false((¶virt_steal_rq_enabled))) { u64 st; steal = paravirt_steal_clock(cpu_of(rq)); @@ -2756,7 +2756,7 @@ void account_idle_time(cputime_t cputime) static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT - if (static_branch(¶virt_steal_enabled)) { + if (static_key_false(¶virt_steal_enabled)) { u64 steal, st = 0; steal = paravirt_steal_clock(smp_processor_id()); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669d..423547ada38a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1399,20 +1399,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) #ifdef CONFIG_CFS_BANDWIDTH #ifdef HAVE_JUMP_LABEL -static struct jump_label_key __cfs_bandwidth_used; +static struct static_key __cfs_bandwidth_used; static inline bool cfs_bandwidth_used(void) { - return static_branch(&__cfs_bandwidth_used); + return static_key_false(&__cfs_bandwidth_used); } void account_cfs_bandwidth_used(int enabled, int was_enabled) { /* only need to count groups transitioning between enabled/!enabled */ if (enabled && !was_enabled) - jump_label_inc(&__cfs_bandwidth_used); + static_key_slow_inc(&__cfs_bandwidth_used); else if (!enabled && was_enabled) - jump_label_dec(&__cfs_bandwidth_used); + static_key_slow_dec(&__cfs_bandwidth_used); } #else /* HAVE_JUMP_LABEL */ static bool cfs_bandwidth_used(void) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..b4cd6d8ea150 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -611,7 +611,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ #ifdef CONFIG_SCHED_DEBUG -# include +# include # define const_debug __read_mostly #else # define const_debug const @@ -630,18 +630,18 @@ enum { #undef SCHED_FEAT #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) -static __always_inline bool static_branch__true(struct jump_label_key *key) +static __always_inline bool static_branch__true(struct static_key *key) { - return likely(static_branch(key)); /* Not out of line branch. */ + return static_key_true(key); /* Not out of line branch. */ } -static __always_inline bool static_branch__false(struct jump_label_key *key) +static __always_inline bool static_branch__false(struct static_key *key) { - return unlikely(static_branch(key)); /* Out of line branch. */ + return static_key_false(key); /* Out of line branch. */ } #define SCHED_FEAT(name, enabled) \ -static __always_inline bool static_branch_##name(struct jump_label_key *key) \ +static __always_inline bool static_branch_##name(struct static_key *key) \ { \ return static_branch__##enabled(key); \ } @@ -650,7 +650,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \ #undef SCHED_FEAT -extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; +extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index f1539decd99d..d96ba22dabfa 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include extern struct tracepoint * const __start___tracepoints_ptrs[]; extern struct tracepoint * const __stop___tracepoints_ptrs[]; @@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, { WARN_ON(strcmp((*entry)->name, elem->name) != 0); - if (elem->regfunc && !jump_label_enabled(&elem->key) && active) + if (elem->regfunc && !static_key_enabled(&elem->key) && active) elem->regfunc(); - else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) + else if (elem->unregfunc && static_key_enabled(&elem->key) && !active) elem->unregfunc(); /* @@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry, * is used. */ rcu_assign_pointer(elem->funcs, (*entry)->funcs); - if (active && !jump_label_enabled(&elem->key)) - jump_label_inc(&elem->key); - else if (!active && jump_label_enabled(&elem->key)) - jump_label_dec(&elem->key); + if (active && !static_key_enabled(&elem->key)) + static_key_slow_inc(&elem->key); + else if (!active && static_key_enabled(&elem->key)) + static_key_slow_dec(&elem->key); } /* @@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, */ static void disable_tracepoint(struct tracepoint *elem) { - if (elem->unregfunc && jump_label_enabled(&elem->key)) + if (elem->unregfunc && static_key_enabled(&elem->key)) elem->unregfunc(); - if (jump_label_enabled(&elem->key)) - jump_label_dec(&elem->key); + if (static_key_enabled(&elem->key)) + static_key_slow_dec(&elem->key); rcu_assign_pointer(elem->funcs, NULL); } diff --git a/net/core/dev.c b/net/core/dev.c index 115dee1d985d..da7ce7f0e566 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -134,7 +134,7 @@ #include #include #include -#include +#include #include #include "net-sysfs.h" @@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -static struct jump_label_key netstamp_needed __read_mostly; +static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call jump_label_dec() from irq context +/* We are not allowed to call static_key_slow_dec() from irq context * If net_disable_timestamp() is called from irq context, defer the - * jump_label_dec() calls. + * static_key_slow_dec() calls. */ static atomic_t netstamp_needed_deferred; #endif @@ -1457,12 +1457,12 @@ void net_enable_timestamp(void) if (deferred) { while (--deferred) - jump_label_dec(&netstamp_needed); + static_key_slow_dec(&netstamp_needed); return; } #endif WARN_ON(in_interrupt()); - jump_label_inc(&netstamp_needed); + static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1474,19 +1474,19 @@ void net_disable_timestamp(void) return; } #endif - jump_label_dec(&netstamp_needed); + static_key_slow_dec(&netstamp_needed); } EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp.tv64 = 0; - if (static_branch(&netstamp_needed)) + if (static_key_false(&netstamp_needed)) __net_timestamp(skb); } #define net_timestamp_check(COND, SKB) \ - if (static_branch(&netstamp_needed)) { \ + if (static_key_false(&netstamp_needed)) { \ if ((COND) && !(SKB)->tstamp.tv64) \ __net_timestamp(SKB); \ } \ @@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); -struct jump_label_key rps_needed __read_mostly; +struct static_key rps_needed __read_mostly; static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb) trace_netif_rx(skb); #ifdef CONFIG_RPS - if (static_branch(&rps_needed)) { + if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb) return NET_RX_SUCCESS; #ifdef CONFIG_RPS - if (static_branch(&rps_needed)) { + if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu, ret; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a1727cda03d7..495586232aa1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, spin_unlock(&rps_map_lock); if (map) - jump_label_inc(&rps_needed); + static_key_slow_inc(&rps_needed); if (old_map) { kfree_rcu(old_map, rcu); - jump_label_dec(&rps_needed); + static_key_slow_dec(&rps_needed); } free_cpumask_var(mask); return len; diff --git a/net/core/sock.c b/net/core/sock.c index 3e81fd2e3c75..3a4e5817a2a7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -111,7 +111,7 @@ #include #include #include -#include +#include #include #include @@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -struct jump_label_key memcg_socket_limit_enabled; +struct static_key memcg_socket_limit_enabled; EXPORT_SYMBOL(memcg_socket_limit_enabled); /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d05559d4d9cd..0c2850874254 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); if (sock_table) - jump_label_inc(&rps_needed); + static_key_slow_inc(&rps_needed); if (orig_sock_table) { - jump_label_dec(&rps_needed); + static_key_slow_dec(&rps_needed); synchronize_rcu(); vfree(orig_sock_table); } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 49978788a9dc..602fb305365f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); if (val != RESOURCE_MAX) - jump_label_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_socket_limit_enabled); } EXPORT_SYMBOL(tcp_destroy_cgroup); @@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) net->ipv4.sysctl_tcp_mem[i]); if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) - jump_label_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_socket_limit_enabled); else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) - jump_label_inc(&memcg_socket_limit_enabled); + static_key_slow_inc(&memcg_socket_limit_enabled); return 0; } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b4e8ff05b301..e1b7e051332e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); #if defined(CONFIG_JUMP_LABEL) -struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg) list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); #if defined(CONFIG_JUMP_LABEL) - jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; } @@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); #if defined(CONFIG_JUMP_LABEL) - jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); } -- cgit v1.2.3 From 3bdc0eba0b8b47797f4a76e377dd8360f317450f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 11 Feb 2012 15:39:30 +0000 Subject: net: Add framework to allow sending packets with customized CRC. This is useful for testing RX handling of frames with bad CRCs. Requires driver support to actually put the packet on the wire properly. Signed-off-by: Ben Greear Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- arch/alpha/include/asm/socket.h | 3 +++ arch/arm/include/asm/socket.h | 3 +++ arch/avr32/include/asm/socket.h | 3 +++ arch/cris/include/asm/socket.h | 3 +++ arch/frv/include/asm/socket.h | 3 +++ arch/h8300/include/asm/socket.h | 3 +++ arch/ia64/include/asm/socket.h | 3 +++ arch/m32r/include/asm/socket.h | 3 +++ arch/m68k/include/asm/socket.h | 3 +++ arch/mips/include/asm/socket.h | 3 +++ arch/mn10300/include/asm/socket.h | 3 +++ arch/parisc/include/asm/socket.h | 4 ++++ arch/powerpc/include/asm/socket.h | 3 +++ arch/s390/include/asm/socket.h | 3 +++ arch/sparc/include/asm/socket.h | 4 ++++ arch/xtensa/include/asm/socket.h | 3 +++ include/asm-generic/socket.h | 4 ++++ include/linux/if.h | 2 ++ include/linux/netdevice.h | 8 +++++++- include/linux/skbuff.h | 4 +++- include/net/sock.h | 4 ++++ net/core/skbuff.c | 1 + net/core/sock.c | 5 +++++ net/packet/af_packet.c | 32 ++++++++++++++++++++++++++++---- 24 files changed, 104 insertions(+), 6 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 16449d330dae..dcb221a4b5be 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -73,6 +73,9 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index d958c74e5260..6433cadb6ed4 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 30078f98b3ab..a473f8c6a9aa 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 048aba64600c..ae52825021af 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -68,6 +68,9 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h index 7a361810f3cc..a5b1d7dbb205 100644 --- a/arch/frv/include/asm/socket.h +++ b/arch/frv/include/asm/socket.h @@ -66,5 +66,8 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index e7bbfcee5b99..ec4554e7b04b 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index ced62de9d5a9..41fc28a4a18a 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -75,4 +75,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index 696cb4c7ca4e..a15f40b52783 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index e8b41a6775f9..d1be684edf97 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index 52104872e9e3..a2ed6fdad4e0 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -86,6 +86,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h index 013fcc51698f..820463a484b8 100644 --- a/arch/mn10300/include/asm/socket.h +++ b/arch/mn10300/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index f717c9bec16f..1b52c2c31a7a 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -65,6 +65,10 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 0x4023 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 0x4024 + + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index fe1c0b478fd7..3d5179bb122f 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -73,4 +73,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index 581702fa1b0c..c91b720965c0 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -74,4 +74,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index 68e2e2746f6f..bea1568ae4af 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -62,6 +62,10 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 0x0026 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 0x0027 + + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index 74818b161362..e36c68184920 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -77,4 +77,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h index d9aaac0c36d4..b1bea03274d5 100644 --- a/include/asm-generic/socket.h +++ b/include/asm-generic/socket.h @@ -68,4 +68,8 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/linux/if.h b/include/linux/if.h index 06b6ef60c821..f995c663c493 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -80,6 +80,8 @@ * skbs on transmit */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ #define IFF_TEAM_PORT 0x40000 /* device used as team port */ +#define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..f1b7d037c2c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1082,7 +1082,8 @@ struct net_device { const struct header_ops *header_ops; unsigned int flags; /* interface flags (a la BSD) */ - unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. + * See if.h for definitions. */ unsigned short gflags; unsigned short padded; /* How much padding added by alloc_netdev() */ @@ -2650,6 +2651,11 @@ static inline int netif_is_bond_slave(struct net_device *dev) return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } +static inline bool netif_supports_nofcs(struct net_device *dev) +{ + return dev->priv_flags & IFF_SUPP_NOFCS; +} + extern struct pernet_operations __net_initdata loopback_net_ops; /* Logging, debugging and troubleshooting/diagnostic helpers. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c11a44ea1bf4..06a4c0fd7bef 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -361,6 +361,7 @@ typedef unsigned char *sk_buff_data_t; * ports. * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not + * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -459,7 +460,8 @@ struct sk_buff { __u8 l4_rxhash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - /* 10/12 bit hole (depending on ndisc_nodetype presence) */ + __u8 no_fcs:1; + /* 9/11 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #ifdef CONFIG_NET_DMA diff --git a/include/net/sock.h b/include/net/sock.h index 9c0553b9e451..ba761e7de252 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -615,6 +615,10 @@ enum sock_flags { SOCK_RXQ_OVFL, SOCK_ZEROCOPY, /* buffers from userspace */ SOCK_WIFI_STATUS, /* push wifi status to userspace */ + SOCK_NOFCS, /* Tell NIC not to do the Ethernet FCS. + * Will use last 4 bytes of packet sent from + * user-space instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3a530780753..6eb656acdfe5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -592,6 +592,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; + new->no_fcs = old->no_fcs; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif diff --git a/net/core/sock.c b/net/core/sock.c index 19942d4bb6e6..55011cb691ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -799,6 +799,11 @@ set_rcvbuf: else ret = -EOPNOTSUPP; break; + + case SO_NOFCS: + sock_valbool_flag(sk, SOCK_NOFCS, valbool); + break; + default: ret = -ENOPROTOOPT; break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2dbb32b988c4..ae2d484416dd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1459,6 +1459,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, struct net_device *dev; __be16 proto = 0; int err; + int extra_len = 0; /* * Get and verify the address. @@ -1493,8 +1494,16 @@ retry: * raw protocol and you must do your own fragmentation at this level. */ + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { @@ -1526,7 +1535,7 @@ retry: goto retry; } - if (len > (dev->mtu + dev->hard_header_len)) { + if (len > (dev->mtu + dev->hard_header_len + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -1548,6 +1557,9 @@ retry: if (err < 0) goto out_unlock; + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + dev_queue_xmit(skb); rcu_read_unlock(); return len; @@ -2209,6 +2221,7 @@ static int packet_snd(struct socket *sock, struct packet_sock *po = pkt_sk(sk); unsigned short gso_type = 0; int hlen, tlen; + int extra_len = 0; /* * Get and verify the address. @@ -2288,8 +2301,16 @@ static int packet_snd(struct socket *sock, } } + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; @@ -2315,7 +2336,7 @@ static int packet_snd(struct socket *sock, if (err < 0) goto out_free; - if (!gso_type && (len > dev->mtu + reserve)) { + if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -2353,6 +2374,9 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + /* * Now send it */ -- cgit v1.2.3 From 7e180bd8020d213bb0de15c3606968f8a9262439 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 11 Mar 2012 11:59:25 -0400 Subject: [S390] rename lowcore field The 16 bit value at the lowcore location with offset 0x84 is the cpu address that is associated with an external interrupt. Rename the field from cpu_addr to ext_cpu_addr to make that clear. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 4 ++-- arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/entry64.S | 2 +- arch/s390/kernel/swsusp_asm64.S | 4 ++-- arch/s390/kvm/interrupt.c | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 707f2306725b..0831449e87a3 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -56,7 +56,7 @@ struct _lowcore { psw_t mcck_new_psw; /* 0x0070 */ psw_t io_new_psw; /* 0x0078 */ __u32 ext_params; /* 0x0080 */ - __u16 cpu_addr; /* 0x0084 */ + __u16 ext_cpu_addr; /* 0x0084 */ __u16 ext_int_code; /* 0x0086 */ __u16 svc_ilc; /* 0x0088 */ __u16 svc_code; /* 0x008a */ @@ -189,7 +189,7 @@ struct _lowcore { __u32 ipl_parmblock_ptr; /* 0x0014 */ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ __u32 ext_params; /* 0x0080 */ - __u16 cpu_addr; /* 0x0084 */ + __u16 ext_cpu_addr; /* 0x0084 */ __u16 ext_int_code; /* 0x0086 */ __u16 svc_ilc; /* 0x0088 */ __u16 svc_code; /* 0x008a */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 6e6a72e66d60..530ae0e8e38f 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -78,7 +78,7 @@ int main(void) BLANK(); /* lowcore offsets */ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); - DEFINE(__LC_CPU_ADDRESS, offsetof(struct _lowcore, cpu_addr)); + DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3705700ed374..5f437b830da5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -606,7 +606,7 @@ ext_skip: stm %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF lr %r2,%r11 # pass pointer to pt_regs - l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 # call do_extint diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 412a7b8783d7..bacbd2848d40 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -625,7 +625,7 @@ ext_skip: TRACE_IRQS_OFF lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs - llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code llgf %r4,__LC_EXT_PARAMS # get external parameter lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index acb78cdee896..2ef39d1519a9 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -42,7 +42,7 @@ ENTRY(swsusp_arch_suspend) lghi %r1,0x1000 /* Save CPU address */ - stap __LC_CPU_ADDRESS(%r0) + stap __LC_EXT_CPU_ADDR(%r0) /* Store registers */ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ @@ -173,7 +173,7 @@ pgm_check_entry: larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ stap 0(%r1) llgh %r2,0(%r1) - llgh %r1,__LC_CPU_ADDRESS(%r0) /* Suspend CPU address: r1 */ + llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ cgr %r1,%r2 je restore_registers /* r1 = r2 -> nothing to do */ larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 278ee009ce65..f0647ce6da21 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -134,7 +134,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, if (rc == -EFAULT) exception = 1; - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->emerg.code); + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); if (rc == -EFAULT) exception = 1; @@ -156,7 +156,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, if (rc == -EFAULT) exception = 1; - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); if (rc == -EFAULT) exception = 1; @@ -202,7 +202,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, if (rc == -EFAULT) exception = 1; - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); if (rc == -EFAULT) exception = 1; -- cgit v1.2.3 From 8b646bd759086f6090fe27acf414c0b5faa737f4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 11 Mar 2012 11:59:26 -0400 Subject: [S390] rework smp code Define struct pcpu and merge some of the NR_CPUS arrays into it, including __cpu_logical_map, current_set and smp_cpu_state. Split smp related functions to those operating on physical cpus and the functions operating on a logical cpu number. Make the functions for physical cpus use a pointer to a struct pcpu. This hides the knowledge about cpu addresses in smp.c, entry[64].S and swsusp_asm64.S, thus remove the sigp.h header. The PSW restart mechanism is used to start secondary cpus, calling a function on an online cpu, calling a function on the ipl cpu, and for the nmi signal. Replace the different assembler functions with a single function restart_int_handler. The new entry point calls a function whose pointer is stored in the lowcore of the target cpu and it can wait for the source cpu to stop. This covers all existing use cases. Overall the code is now simpler and there are ~380 lines less code. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 100 ++-- arch/s390/include/asm/sigp.h | 132 ----- arch/s390/include/asm/smp.h | 63 +-- arch/s390/include/asm/vdso.h | 4 +- arch/s390/kernel/Makefile | 2 - arch/s390/kernel/asm-offsets.c | 17 +- arch/s390/kernel/entry.S | 81 +-- arch/s390/kernel/entry.h | 9 +- arch/s390/kernel/entry64.S | 72 +-- arch/s390/kernel/ipl.c | 21 +- arch/s390/kernel/machine_kexec.c | 43 +- arch/s390/kernel/setup.c | 56 +- arch/s390/kernel/smp.c | 1083 +++++++++++++++++++------------------- arch/s390/kernel/switch_cpu.S | 58 -- arch/s390/kernel/switch_cpu64.S | 51 -- arch/s390/kernel/swsusp_asm64.S | 12 +- arch/s390/kernel/topology.c | 8 +- arch/s390/kernel/vdso.c | 28 +- arch/s390/kernel/vtime.c | 3 + arch/s390/lib/spinlock.c | 30 +- drivers/s390/char/sclp_quiesce.c | 1 - drivers/s390/char/zcore.c | 1 - 22 files changed, 739 insertions(+), 1136 deletions(-) delete mode 100644 arch/s390/include/asm/sigp.h delete mode 100644 arch/s390/kernel/switch_cpu.S delete mode 100644 arch/s390/kernel/switch_cpu64.S (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 0831449e87a3..4e69563bc956 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999,2012 * Author(s): Hartmut Penner , * Martin Schwidefsky , * Denis Joseph Barrow, @@ -12,14 +12,6 @@ #include #include -void restart_int_handler(void); -void ext_int_handler(void); -void system_call(void); -void pgm_check_handler(void); -void mcck_int_handler(void); -void io_int_handler(void); -void psw_restart_int_handler(void); - #ifdef CONFIG_32BIT #define LC_ORDER 0 @@ -117,32 +109,37 @@ struct _lowcore { __u64 steal_timer; /* 0x0288 */ __u64 last_update_timer; /* 0x0290 */ __u64 last_update_clock; /* 0x0298 */ + __u64 int_clock; /* 0x02a0 */ + __u64 mcck_clock; /* 0x02a8 */ + __u64 clock_comparator; /* 0x02b0 */ /* Current process. */ - __u32 current_task; /* 0x02a0 */ - __u32 thread_info; /* 0x02a4 */ - __u32 kernel_stack; /* 0x02a8 */ + __u32 current_task; /* 0x02b8 */ + __u32 thread_info; /* 0x02bc */ + __u32 kernel_stack; /* 0x02c0 */ + + /* Interrupt, panic and restart stack. */ + __u32 async_stack; /* 0x02c4 */ + __u32 panic_stack; /* 0x02c8 */ + __u32 restart_stack; /* 0x02cc */ - /* Interrupt and panic stack. */ - __u32 async_stack; /* 0x02ac */ - __u32 panic_stack; /* 0x02b0 */ + /* Restart function and parameter. */ + __u32 restart_fn; /* 0x02d0 */ + __u32 restart_data; /* 0x02d4 */ + __u32 restart_source; /* 0x02d8 */ /* Address space pointer. */ - __u32 kernel_asce; /* 0x02b4 */ - __u32 user_asce; /* 0x02b8 */ - __u32 current_pid; /* 0x02bc */ + __u32 kernel_asce; /* 0x02dc */ + __u32 user_asce; /* 0x02e0 */ + __u32 current_pid; /* 0x02e4 */ /* SMP info area */ - __u32 cpu_nr; /* 0x02c0 */ - __u32 softirq_pending; /* 0x02c4 */ - __u32 percpu_offset; /* 0x02c8 */ - __u32 ext_call_fast; /* 0x02cc */ - __u64 int_clock; /* 0x02d0 */ - __u64 mcck_clock; /* 0x02d8 */ - __u64 clock_comparator; /* 0x02e0 */ - __u32 machine_flags; /* 0x02e8 */ - __u32 ftrace_func; /* 0x02ec */ - __u8 pad_0x02f8[0x0300-0x02f0]; /* 0x02f0 */ + __u32 cpu_nr; /* 0x02e8 */ + __u32 softirq_pending; /* 0x02ec */ + __u32 percpu_offset; /* 0x02f0 */ + __u32 machine_flags; /* 0x02f4 */ + __u32 ftrace_func; /* 0x02f8 */ + __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -254,34 +251,39 @@ struct _lowcore { __u64 steal_timer; /* 0x02e0 */ __u64 last_update_timer; /* 0x02e8 */ __u64 last_update_clock; /* 0x02f0 */ + __u64 int_clock; /* 0x02f8 */ + __u64 mcck_clock; /* 0x0300 */ + __u64 clock_comparator; /* 0x0308 */ /* Current process. */ - __u64 current_task; /* 0x02f8 */ - __u64 thread_info; /* 0x0300 */ - __u64 kernel_stack; /* 0x0308 */ + __u64 current_task; /* 0x0310 */ + __u64 thread_info; /* 0x0318 */ + __u64 kernel_stack; /* 0x0320 */ + + /* Interrupt, panic and restart stack. */ + __u64 async_stack; /* 0x0328 */ + __u64 panic_stack; /* 0x0330 */ + __u64 restart_stack; /* 0x0338 */ - /* Interrupt and panic stack. */ - __u64 async_stack; /* 0x0310 */ - __u64 panic_stack; /* 0x0318 */ + /* Restart function and parameter. */ + __u64 restart_fn; /* 0x0340 */ + __u64 restart_data; /* 0x0348 */ + __u64 restart_source; /* 0x0350 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0320 */ - __u64 user_asce; /* 0x0328 */ - __u64 current_pid; /* 0x0330 */ + __u64 kernel_asce; /* 0x0358 */ + __u64 user_asce; /* 0x0360 */ + __u64 current_pid; /* 0x0368 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0338 */ - __u32 softirq_pending; /* 0x033c */ - __u64 percpu_offset; /* 0x0340 */ - __u64 ext_call_fast; /* 0x0348 */ - __u64 int_clock; /* 0x0350 */ - __u64 mcck_clock; /* 0x0358 */ - __u64 clock_comparator; /* 0x0360 */ - __u64 vdso_per_cpu_data; /* 0x0368 */ - __u64 machine_flags; /* 0x0370 */ - __u64 ftrace_func; /* 0x0378 */ - __u64 gmap; /* 0x0380 */ - __u8 pad_0x0388[0x0400-0x0388]; /* 0x0388 */ + __u32 cpu_nr; /* 0x0370 */ + __u32 softirq_pending; /* 0x0374 */ + __u64 percpu_offset; /* 0x0378 */ + __u64 vdso_per_cpu_data; /* 0x0380 */ + __u64 machine_flags; /* 0x0388 */ + __u64 ftrace_func; /* 0x0390 */ + __u64 gmap; /* 0x0398 */ + __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ /* Interrupt response block. */ __u8 irb[64]; /* 0x0400 */ diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h deleted file mode 100644 index 7040b8567cd0..000000000000 --- a/arch/s390/include/asm/sigp.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Routines and structures for signalling other processors. - * - * Copyright IBM Corp. 1999,2010 - * Author(s): Denis Joseph Barrow, - * Martin Schwidefsky , - * Heiko Carstens , - */ - -#ifndef __ASM_SIGP_H -#define __ASM_SIGP_H - -#include - -/* Get real cpu address from logical cpu number. */ -extern unsigned short __cpu_logical_map[]; - -static inline int cpu_logical_map(int cpu) -{ -#ifdef CONFIG_SMP - return __cpu_logical_map[cpu]; -#else - return stap(); -#endif -} - -enum { - sigp_sense = 1, - sigp_external_call = 2, - sigp_emergency_signal = 3, - sigp_start = 4, - sigp_stop = 5, - sigp_restart = 6, - sigp_stop_and_store_status = 9, - sigp_initial_cpu_reset = 11, - sigp_cpu_reset = 12, - sigp_set_prefix = 13, - sigp_store_status_at_address = 14, - sigp_store_extended_status_at_address = 15, - sigp_set_architecture = 18, - sigp_conditional_emergency_signal = 19, - sigp_sense_running = 21, -}; - -enum { - sigp_order_code_accepted = 0, - sigp_status_stored = 1, - sigp_busy = 2, - sigp_not_operational = 3, -}; - -/* - * Definitions for external call. - */ -enum { - ec_schedule = 0, - ec_call_function, - ec_call_function_single, - ec_stop_cpu, -}; - -/* - * Signal processor. - */ -static inline int raw_sigp(u16 cpu, int order) -{ - register unsigned long reg1 asm ("1") = 0; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) - : "d" (reg1), "d" (cpu), - "a" (order) : "cc" , "memory"); - return ccode; -} - -/* - * Signal processor with parameter. - */ -static inline int raw_sigp_p(u32 parameter, u16 cpu, int order) -{ - register unsigned int reg1 asm ("1") = parameter; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) - : "d" (reg1), "d" (cpu), - "a" (order) : "cc" , "memory"); - return ccode; -} - -/* - * Signal processor with parameter and return status. - */ -static inline int raw_sigp_ps(u32 *status, u32 parm, u16 cpu, int order) -{ - register unsigned int reg1 asm ("1") = parm; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "+d" (reg1) - : "d" (cpu), "a" (order) - : "cc" , "memory"); - *status = reg1; - return ccode; -} - -static inline int sigp(int cpu, int order) -{ - return raw_sigp(cpu_logical_map(cpu), order); -} - -static inline int sigp_p(u32 parameter, int cpu, int order) -{ - return raw_sigp_p(parameter, cpu_logical_map(cpu), order); -} - -static inline int sigp_ps(u32 *status, u32 parm, int cpu, int order) -{ - return raw_sigp_ps(status, parm, cpu_logical_map(cpu), order); -} - -#endif /* __ASM_SIGP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c32e9123b40c..797f78729680 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2009 + * Copyright IBM Corp. 1999,2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , * Heiko Carstens , @@ -10,71 +10,52 @@ #ifdef CONFIG_SMP #include -#include - -extern void machine_restart_smp(char *); -extern void machine_halt_smp(void); -extern void machine_power_off_smp(void); #define raw_smp_processor_id() (S390_lowcore.cpu_nr) -extern int __cpu_disable (void); -extern void __cpu_die (unsigned int cpu); -extern int __cpu_up (unsigned int cpu); - extern struct mutex smp_cpu_state_mutex; +extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; + +extern int __cpu_up(unsigned int cpu); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; - -extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); -extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, - int from, int to); -extern void smp_restart_with_online_cpu(void); -extern void smp_restart_cpu(void); +extern void smp_call_online_cpu(void (*func)(void *), void *); +extern void smp_call_ipl_cpu(void (*func)(void *), void *); -/* - * returns 1 if (virtual) cpu is scheduled - * returns 0 otherwise - */ -static inline int smp_vcpu_scheduled(int cpu) -{ - u32 status; - - switch (sigp_ps(&status, 0, cpu, sigp_sense_running)) { - case sigp_status_stored: - /* Check for running status */ - if (status & 0x400) - return 0; - break; - case sigp_not_operational: - return 0; - default: - break; - } - return 1; -} +extern int smp_find_processor_id(u16 address); +extern int smp_store_status(int cpu); +extern int smp_vcpu_scheduled(int cpu); +extern void smp_yield_cpu(int cpu); +extern void smp_yield(void); +extern void smp_stop_cpu(void); #else /* CONFIG_SMP */ -static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) { func(data); } -static inline void smp_restart_with_online_cpu(void) +static inline void smp_call_online_cpu(void (*func)(void *), void *data) { + func(data); } -#define smp_vcpu_scheduled (1) +static inline int smp_find_processor_id(int address) { return 0; } +static inline int smp_vcpu_scheduled(int cpu) { return 1; } +static inline void smp_yield_cpu(int cpu) { } +static inline void smp_yield(void) { } +static inline void smp_stop_cpu(void) { } #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); +extern void __cpu_die(unsigned int cpu); +extern int __cpu_disable(void); #else static inline int smp_rescan_cpus(void) { return 0; } static inline void cpu_die(void) { } diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 533f35751aeb..c4a11cfad3c8 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -40,8 +40,8 @@ struct vdso_per_cpu_data { extern struct vdso_data *vdso_data; #ifdef CONFIG_64BIT -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +int vdso_alloc_per_cpu(struct _lowcore *lowcore); +void vdso_free_per_cpu(struct _lowcore *lowcore); #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7d9ec924e7e7..d0a48268eb27 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,8 +34,6 @@ extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCHED_BOOK) += topology.o -obj-$(CONFIG_SMP) += $(if $(CONFIG_64BIT),switch_cpu64.o, \ - switch_cpu.o) obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 530ae0e8e38f..aeeaf896be9b 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include /* * Make sure that the compiler is new enough. We want a compiler that @@ -70,12 +70,6 @@ int main(void) DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); BLANK(); - /* constants for SIGP */ - DEFINE(__SIGP_STOP, sigp_stop); - DEFINE(__SIGP_RESTART, sigp_restart); - DEFINE(__SIGP_SENSE, sigp_sense); - DEFINE(__SIGP_INITIAL_CPU_RESET, sigp_initial_cpu_reset); - BLANK(); /* lowcore offsets */ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); @@ -95,20 +89,19 @@ int main(void) DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); - DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); - BLANK(); - DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); + DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + BLANK(); DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); @@ -129,12 +122,16 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); + DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); + DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + BLANK(); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5f437b830da5..6143521a4fff 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -2,7 +2,7 @@ * arch/s390/kernel/entry.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright (C) IBM Corp. 1999,2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -691,77 +691,30 @@ mcck_panic: 0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j mcck_skip -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - spt restart_vtime-restart_base(%r1) - stck __LC_LAST_UPDATE_CLOCK - mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) - mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - l %r15,__LC_GPREGS_SAVE_AREA+60 # load ksp - lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs - lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15)# load registers from clone - l %r1,__LC_THREAD_INFO - mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) - mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) - xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - basr %r14,0 - l %r14,restart_addr-.(%r14) - basr %r14,%r14 # call start_secondary -restart_addr: - .long start_secondary - .align 8 -restart_vtime: - .long 0x7fffffff,0xffffffff - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - lpsw restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000 -restart_go: -#endif - # # PSW restart interrupt handler # -ENTRY(psw_restart_int_handler) +ENTRY(restart_int_handler) st %r15,__LC_SAVE_AREA_RESTART - basr %r15,0 -0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack - l %r15,0(%r15) + l %r15,__LC_RESTART_STACK ahi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) stm %r0,%r14,__PT_R0(%r15) mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw - ahi %r15,-STACK_FRAME_OVERHEAD - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - basr %r14,0 -1: l %r14,.Ldo_restart-1b(%r14) - basr %r14,%r14 - basr %r14,0 # load disabled wait PSW if -2: lpsw restart_psw_crash-2b(%r14) # do_restart returns - .align 4 -.Ldo_restart: - .long do_restart -.Lrestart_stack: - .long restart_stack - .align 8 -restart_psw_crash: - .long 0x000a0000,0x00000000 + restart_psw_crash + ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lm %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu + ltr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,1 # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + lh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,5 # sigp stop to current cpu + brc 2,2b +3: j 3b .section .kprobes.text, "ax" diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index bf538aaf407d..92b1617d0c95 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -9,6 +9,14 @@ extern void (*pgm_check_table[128])(struct pt_regs *); extern void *restart_stack; +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); + asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); @@ -26,7 +34,6 @@ void do_notify_resume(struct pt_regs *regs); void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); void do_restart(void); -int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index bacbd2848d40..e33789a45752 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -2,7 +2,7 @@ * arch/s390/kernel/entry64.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2010 + * Copyright (C) IBM Corp. 1999,2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -713,68 +713,30 @@ mcck_panic: 0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j mcck_skip -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - spt restart_vtime-restart_base(%r1) - stck __LC_LAST_UPDATE_CLOCK - mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) - mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - lghi %r10,__LC_GPREGS_SAVE_AREA - lg %r15,120(%r10) # load ksp - lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs - lghi %r10,__LC_AREGS_SAVE_AREA - lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15)# load registers from clone - lg %r1,__LC_THREAD_INFO - mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) - mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) - xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - brasl %r14,start_secondary - .align 8 -restart_vtime: - .long 0x7fffffff,0xffffffff - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - lpswe restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000,0x00000000,0x00000000 -restart_go: -#endif - # # PSW restart interrupt handler # -ENTRY(psw_restart_int_handler) +ENTRY(restart_int_handler) stg %r15,__LC_SAVE_AREA_RESTART - larl %r15,restart_stack # load restart stack - lg %r15,0(%r15) + lg %r15,__LC_RESTART_STACK aghi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) stmg %r0,%r14,__PT_R0(%r15) mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw - aghi %r15,-STACK_FRAME_OVERHEAD - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - brasl %r14,do_restart - larl %r14,restart_psw_crash # load disabled wait PSW if - lpswe 0(%r14) # do_restart returns - .align 8 -restart_psw_crash: - .quad 0x0002000080000000,0x0000000000000000 + restart_psw_crash + aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lmg %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu + ltgr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,1 # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + llgh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,5 # sigp stop to current cpu + brc 2,2b +3: j 3b .section .kprobes.text, "ax" diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index affa8e68124a..e5a72a2b0c56 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2,7 +2,7 @@ * arch/s390/kernel/ipl.c * ipl/reipl/dump support for Linux on s390. * - * Copyright IBM Corp. 2005,2007 + * Copyright IBM Corp. 2005,2012 * Author(s): Michael Holzheu * Heiko Carstens * Volker Sameske @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "entry.h" @@ -571,7 +570,7 @@ static void __ipl_run(void *unused) static void ipl_run(struct shutdown_trigger *trigger) { - smp_switch_to_ipl_cpu(__ipl_run, NULL); + smp_call_ipl_cpu(__ipl_run, NULL); } static int __init ipl_init(void) @@ -1101,7 +1100,7 @@ static void __reipl_run(void *unused) static void reipl_run(struct shutdown_trigger *trigger) { - smp_switch_to_ipl_cpu(__reipl_run, NULL); + smp_call_ipl_cpu(__reipl_run, NULL); } static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) @@ -1421,7 +1420,7 @@ static void dump_run(struct shutdown_trigger *trigger) if (dump_method == DUMP_METHOD_NONE) return; smp_send_stop(); - smp_switch_to_ipl_cpu(__dump_run, NULL); + smp_call_ipl_cpu(__dump_run, NULL); } static int __init dump_ccw_init(void) @@ -1623,9 +1622,7 @@ static void stop_run(struct shutdown_trigger *trigger) if (strcmp(trigger->name, ON_PANIC_STR) == 0 || strcmp(trigger->name, ON_RESTART_STR) == 0) disabled_wait((unsigned long) __builtin_return_address(0)); - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + smp_stop_cpu(); } static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, @@ -1738,9 +1735,8 @@ static ssize_t on_restart_store(struct kobject *kobj, static struct kobj_attribute on_restart_attr = __ATTR(on_restart, 0644, on_restart_show, on_restart_store); -void do_restart(void) +static void __do_restart(void *ignore) { - smp_restart_with_online_cpu(); smp_send_stop(); #ifdef CONFIG_CRASH_DUMP crash_kexec(NULL); @@ -1749,6 +1745,11 @@ void do_restart(void) stop_run(&on_restart_trigger); } +void do_restart(void) +{ + smp_call_online_cpu(__do_restart, NULL); +} + /* on halt */ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 47b168fb29c4..bf6fbc03ebaf 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -48,51 +48,22 @@ static void add_elf_notes(int cpu) memset(ptr, 0, sizeof(struct elf_note)); } -/* - * Store status of next available physical CPU - */ -static int store_status_next(int start_cpu, int this_cpu) -{ - struct save_area *sa = (void *) 4608 + store_prefix(); - int cpu, rc; - - for (cpu = start_cpu; cpu < 65536; cpu++) { - if (cpu == this_cpu) - continue; - do { - rc = raw_sigp(cpu, sigp_stop_and_store_status); - } while (rc == sigp_busy); - if (rc != sigp_order_code_accepted) - continue; - if (sa->pref_reg) - return cpu; - } - return -1; -} - /* * Initialize CPU ELF notes */ void setup_regs(void) { unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; - int cpu, this_cpu, phys_cpu = 0, first = 1; + int cpu, this_cpu; - this_cpu = stap(); - - if (!S390_lowcore.prefixreg_save_area) - first = 0; + this_cpu = smp_find_processor_id(stap()); + add_elf_notes(this_cpu); for_each_online_cpu(cpu) { - if (first) { - add_elf_notes(cpu); - first = 0; + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) continue; - } - phys_cpu = store_status_next(phys_cpu, this_cpu); - if (phys_cpu == -1) - break; add_elf_notes(cpu); - phys_cpu++; } /* Copy dump CPU store status info to absolute zero */ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); @@ -255,5 +226,5 @@ void machine_kexec(struct kimage *image) return; tracer_disable(); smp_send_stop(); - smp_switch_to_ipl_cpu(__machine_kexec, image); + smp_call_ipl_cpu(__machine_kexec, image); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 778c100fe314..9a3edb5f2c92 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -2,7 +2,7 @@ * arch/s390/kernel/setup.c * * S390 version - * Copyright (C) IBM Corp. 1999,2010 + * Copyright (C) IBM Corp. 1999,2012 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -62,6 +62,7 @@ #include #include #include +#include "entry.h" long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | PSW_MASK_EA | PSW_MASK_BA; @@ -351,8 +352,9 @@ static void setup_addressing_mode(void) } } -static void __init -setup_lowcore(void) +void *restart_stack __attribute__((__section__(".data"))); + +static void __init setup_lowcore(void) { struct _lowcore *lc; @@ -363,7 +365,7 @@ setup_lowcore(void) lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; + PSW_ADDR_AMODE | (unsigned long) restart_int_handler; lc->external_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = @@ -412,6 +414,24 @@ setup_lowcore(void) lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; lc->ftrace_func = S390_lowcore.ftrace_func; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant + * restart data to the absolute zero lowcore. This is necesary if + * PSW restart is done on an offline CPU that has lowcore zero. + */ + lc->restart_stack = (unsigned long) restart_stack; + lc->restart_fn = (unsigned long) do_restart; + lc->restart_data = 0; + lc->restart_source = -1UL; + memcpy(&S390_lowcore.restart_stack, &lc->restart_stack, + 4*sizeof(unsigned long)); + copy_to_absolute_zero(&S390_lowcore.restart_psw, + &lc->restart_psw, sizeof(psw_t)); + set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; } @@ -572,27 +592,6 @@ static void __init setup_memory_end(void) } } -void *restart_stack __attribute__((__section__(".data"))); - -/* - * Setup new PSW and allocate stack for PSW restart interrupt - */ -static void __init setup_restart_psw(void) -{ - psw_t psw; - - restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); - restart_stack += ASYNC_SIZE; - - /* - * Setup restart PSW for absolute zero lowcore. This is necesary - * if PSW restart is done on an offline CPU that has lowcore zero - */ - psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); -} - static void __init setup_vmcoreinfo(void) { #ifdef CONFIG_KEXEC @@ -782,8 +781,7 @@ static void __init reserve_crashkernel(void) #endif } -static void __init -setup_memory(void) +static void __init setup_memory(void) { unsigned long bootmap_size; unsigned long start_pfn, end_pfn; @@ -1014,8 +1012,7 @@ static void __init setup_hwcaps(void) * was printed. */ -void __init -setup_arch(char **cmdline_p) +void __init setup_arch(char **cmdline_p) { /* * print what head.S has found out about the machine @@ -1068,7 +1065,6 @@ setup_arch(char **cmdline_p) setup_memory(); setup_resources(); setup_vmcoreinfo(); - setup_restart_psw(); setup_lowcore(); cpu_init(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2398ce6b15ae..6db8526a602d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,23 +1,18 @@ /* - * arch/s390/kernel/smp.c + * SMP related functions * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - * Heiko Carstens (heiko.carstens@de.ibm.com) + * Copyright IBM Corp. 1999,2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky , + * Heiko Carstens , * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd * (c) 1998 Ingo Molnar * - * We work with logical cpu numbering everywhere we can. The only - * functions using the real cpu address (got from STAP) are the sigp - * functions. For all other functions we use the identity mapping. - * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is - * used e.g. to find the idle task belonging to a logical cpu. Every array - * in the kernel is sorted by the logical cpu number and not by the physical - * one which is causing all the confusion with __cpu_logical_map and - * cpu_number_map in other architectures. + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c. */ #define KMSG_COMPONENT "cpu" @@ -31,140 +26,381 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include #include #include -#include -#include #include -#include #include #include #include #include -#include #include -#include #include "entry.h" -/* logical cpu to cpu address */ -unsigned short __cpu_logical_map[NR_CPUS]; +enum { + sigp_sense = 1, + sigp_external_call = 2, + sigp_emergency_signal = 3, + sigp_start = 4, + sigp_stop = 5, + sigp_restart = 6, + sigp_stop_and_store_status = 9, + sigp_initial_cpu_reset = 11, + sigp_cpu_reset = 12, + sigp_set_prefix = 13, + sigp_store_status_at_address = 14, + sigp_store_extended_status_at_address = 15, + sigp_set_architecture = 18, + sigp_conditional_emergency_signal = 19, + sigp_sense_running = 21, +}; -static struct task_struct *current_set[NR_CPUS]; +enum { + sigp_order_code_accepted = 0, + sigp_status_stored = 1, + sigp_busy = 2, + sigp_not_operational = 3, +}; -static u8 smp_cpu_type; -static int smp_use_sigp_detection; +enum { + ec_schedule = 0, + ec_call_function, + ec_call_function_single, + ec_stop_cpu, +}; -enum s390_cpu_state { +enum { CPU_STATE_STANDBY, CPU_STATE_CONFIGURED, }; +struct pcpu { + struct cpu cpu; + struct task_struct *idle; /* idle process for the cpu */ + struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + unsigned long async_stack; /* async stack for the cpu */ + unsigned long panic_stack; /* panic stack for the cpu */ + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + int state; /* physical cpu state */ + u32 status; /* last status received via sigp */ + u16 address; /* physical cpu address */ +}; + +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; + DEFINE_MUTEX(smp_cpu_state_mutex); -static int smp_cpu_state[NR_CPUS]; -static DEFINE_PER_CPU(struct cpu, cpu_devices); +/* + * Signal processor helper functions. + */ +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ + register unsigned int reg1 asm ("1") = parm; + int cc; -static void smp_ext_bitcall(int, int); + asm volatile( + " sigp %1,%2,0(%3)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; +} -static int raw_cpu_stopped(int cpu) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) { - u32 status; + int cc; - switch (raw_sigp_ps(&status, 0, cpu, sigp_sense)) { - case sigp_status_stored: - /* Check for stopped and check stop state */ - if (status & 0x50) - return 1; - break; - default: - break; + while (1) { + cc = __pcpu_sigp(addr, order, parm, status); + if (cc != sigp_busy) + return cc; + cpu_relax(); } - return 0; } -static inline int cpu_stopped(int cpu) +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) { - return raw_cpu_stopped(cpu_logical_map(cpu)); + int cc, retry; + + for (retry = 0; ; retry++) { + cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status); + if (cc != sigp_busy) + break; + if (retry >= 3) + udelay(10); + } + return cc; +} + +static inline int pcpu_stopped(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, sigp_sense, + 0, &pcpu->status) != sigp_status_stored) + return 0; + /* Check for stopped and check stop state */ + return !!(pcpu->status & 0x50); +} + +static inline int pcpu_running(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, sigp_sense_running, + 0, &pcpu->status) != sigp_status_stored) + return 1; + /* Check for running status */ + return !(pcpu->status & 0x400); } /* - * Ensure that PSW restart is done on an online CPU + * Find struct pcpu by cpu address. */ -void smp_restart_with_online_cpu(void) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) { int cpu; - for_each_online_cpu(cpu) { - if (stap() == __cpu_logical_map[cpu]) { - /* We are online: Enable DAT again and return */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); - return; - } + for_each_cpu(cpu, mask) + if (pcpu_devices[cpu].address == address) + return pcpu_devices + cpu; + return NULL; +} + +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) +{ + int order; + + set_bit(ec_bit, &pcpu->ec_mask); + order = pcpu_running(pcpu) ? + sigp_external_call : sigp_emergency_signal; + pcpu_sigp_retry(pcpu, order, 0); +} + +static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc; + + if (pcpu != &pcpu_devices[0]) { + pcpu->lowcore = (struct _lowcore *) + __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + pcpu->panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + goto out; } - /* We are not online: Do PSW restart on an online CPU */ - while (sigp(cpu, sigp_restart) == sigp_busy) - cpu_relax(); - /* And stop ourself */ - while (raw_sigp(stap(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + lc = pcpu->lowcore; + memcpy(lc, &S390_lowcore, 512); + memset((char *) lc + 512, 0, sizeof(*lc) - 512); + lc->async_stack = pcpu->async_stack + ASYNC_SIZE; + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE; + lc->cpu_nr = cpu; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); + if (!lc->extended_save_area_addr) + goto out; + } +#else + if (vdso_alloc_per_cpu(lc)) + goto out; +#endif + lowcore_ptr[cpu] = lc; + pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc); + return 0; +out: + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } + return -ENOMEM; } -void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static void pcpu_free_lowcore(struct pcpu *pcpu) { - struct _lowcore *lc, *current_lc; - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long sp; - - if (smp_processor_id() == 0) - func(data); - __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | - PSW_MASK_EA | PSW_MASK_BA); - /* Disable lowcore protection */ - __ctl_clear_bit(0, 28); - current_lc = lowcore_ptr[smp_processor_id()]; - lc = lowcore_ptr[0]; - if (!lc) - lc = current_lc; - lc->restart_psw.mask = - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; - if (!cpu_online(0)) - smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); - while (sigp(0, sigp_stop_and_store_status) == sigp_busy) - cpu_relax(); - sp = lc->panic_stack; - sp -= sizeof(struct pt_regs); - regs = (struct pt_regs *) sp; - memcpy(®s->gprs, ¤t_lc->gpregs_save_area, sizeof(regs->gprs)); - regs->psw = current_lc->psw_save_area; - sp -= STACK_FRAME_OVERHEAD; - sf = (struct stack_frame *) sp; - sf->back_chain = 0; - smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]); + pcpu_sigp_retry(pcpu, sigp_set_prefix, 0); + lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + struct _lowcore *lc = pcpu->lowcore; + + free_page((unsigned long) lc->extended_save_area_addr); + lc->extended_save_area_addr = 0; + } +#else + vdso_free_per_cpu(pcpu->lowcore); +#endif + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } +} + +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc = pcpu->lowcore; + + atomic_inc(&init_mm.context.attach_count); + lc->cpu_nr = cpu; + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->ftrace_func = S390_lowcore.ftrace_func; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); +} + +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +{ + struct _lowcore *lc = pcpu->lowcore; + struct thread_info *ti = task_thread_info(tsk); + + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE; + lc->thread_info = (unsigned long) task_thread_info(tsk); + lc->current_task = (unsigned long) tsk; + lc->user_timer = ti->user_timer; + lc->system_timer = ti->system_timer; + lc->steal_timer = 0; +} + +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +{ + struct _lowcore *lc = pcpu->lowcore; + + lc->restart_stack = lc->kernel_stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = -1UL; + pcpu_sigp_retry(pcpu, sigp_restart, 0); +} + +/* + * Call function via PSW restart on pcpu and stop the current cpu. + */ +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), + void *data, unsigned long stack) +{ + struct _lowcore *lc = pcpu->lowcore; + unsigned short this_cpu; + + __load_psw_mask(psw_kernel_bits); + this_cpu = stap(); + if (pcpu->address == this_cpu) + func(data); /* should not return */ + /* Stop target cpu (if func returns this stops the current cpu). */ + pcpu_sigp_retry(pcpu, sigp_stop, 0); + /* Restart func on the target cpu and stop the current cpu. */ + lc->restart_stack = stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = (unsigned long) this_cpu; + asm volatile( + "0: sigp 0,%0,6 # sigp restart to target cpu\n" + " brc 2,0b # busy, try again\n" + "1: sigp 0,%1,5 # sigp stop to current cpu\n" + " brc 2,1b # busy, try again\n" + : : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc"); + for (;;) ; +} + +/* + * Call function on an online CPU. + */ +void smp_call_online_cpu(void (*func)(void *), void *data) +{ + struct pcpu *pcpu; + + /* Use the current cpu if it is online. */ + pcpu = pcpu_find_address(cpu_online_mask, stap()); + if (!pcpu) + /* Use the first online cpu. */ + pcpu = pcpu_devices + cpumask_first(cpu_online_mask); + pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); +} + +/* + * Call function on the ipl CPU. + */ +void smp_call_ipl_cpu(void (*func)(void *), void *data) +{ + pcpu_delegate(&pcpu_devices[0], func, data, pcpu_devices->panic_stack); +} + +int smp_find_processor_id(u16 address) +{ + int cpu; + + for_each_present_cpu(cpu) + if (pcpu_devices[cpu].address == address) + return cpu; + return -1; } -static void smp_stop_cpu(void) +int smp_vcpu_scheduled(int cpu) { - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) + return pcpu_running(pcpu_devices + cpu); +} + +void smp_yield(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +void smp_yield_cpu(int cpu) +{ + if (MACHINE_HAS_DIAG9C) + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); + else if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. + */ +void smp_emergency_stop(cpumask_t *cpumask) +{ + u64 end; + int cpu; + + end = get_clock() + (1000000UL << 12); + for_each_cpu(cpu, cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + set_bit(ec_stop_cpu, &pcpu->ec_mask); + while (__pcpu_sigp(pcpu->address, sigp_emergency_signal, + 0, NULL) == sigp_busy && + get_clock() < end) + cpu_relax(); + } + while (get_clock() < end) { + for_each_cpu(cpu, cpumask) + if (pcpu_stopped(pcpu_devices + cpu)) + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) + break; cpu_relax(); + } } +/* + * Stop all cpus but the current one. + */ void smp_send_stop(void) { cpumask_t cpumask; int cpu; - u64 end; /* Disable all interrupts/machine checks */ __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); @@ -173,56 +409,46 @@ void smp_send_stop(void) cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); - if (oops_in_progress) { - /* - * Give the other cpus the opportunity to complete - * outstanding interrupts before stopping them. - */ - end = get_clock() + (1000000UL << 12); - for_each_cpu(cpu, &cpumask) { - set_bit(ec_stop_cpu, (unsigned long *) - &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy && - get_clock() < end) - cpu_relax(); - } - while (get_clock() < end) { - for_each_cpu(cpu, &cpumask) - if (cpu_stopped(cpu)) - cpumask_clear_cpu(cpu, &cpumask); - if (cpumask_empty(&cpumask)) - break; - cpu_relax(); - } - } + if (oops_in_progress) + smp_emergency_stop(&cpumask); /* stop all processors */ for_each_cpu(cpu, &cpumask) { - while (sigp(cpu, sigp_stop) == sigp_busy) - cpu_relax(); - while (!cpu_stopped(cpu)) + struct pcpu *pcpu = pcpu_devices + cpu; + pcpu_sigp_retry(pcpu, sigp_stop, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } +/* + * Stop the current cpu. + */ +void smp_stop_cpu(void) +{ + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); + for (;;) ; +} + /* * This is the main routine where commands issued by other * cpus are handled. */ - static void do_ext_call_interrupt(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { unsigned long bits; + int cpu; + cpu = smp_processor_id(); if ((ext_int_code & 0xffff) == 0x1202) - kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + kstat_cpu(cpu).irqs[EXTINT_EXC]++; else - kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; + kstat_cpu(cpu).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ - bits = xchg(&S390_lowcore.ext_call_fast, 0); + bits = xchg(&pcpu_devices[cpu].ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); @@ -238,38 +464,17 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, } -/* - * Send an external call sigp to another cpu and return without waiting - * for its completion. - */ -static void smp_ext_bitcall(int cpu, int sig) -{ - int order; - - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (1) { - order = smp_vcpu_scheduled(cpu) ? - sigp_external_call : sigp_emergency_signal; - if (sigp(cpu, order) != sigp_busy) - break; - udelay(10); - } -} - void arch_send_call_function_ipi_mask(const struct cpumask *mask) { int cpu; for_each_cpu(cpu, mask) - smp_ext_bitcall(cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function); } void arch_send_call_function_single_ipi(int cpu) { - smp_ext_bitcall(cpu, ec_call_function_single); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } #ifndef CONFIG_64BIT @@ -295,15 +500,16 @@ EXPORT_SYMBOL(smp_ptlb_all); */ void smp_send_reschedule(int cpu) { - smp_ext_bitcall(cpu, ec_schedule); + pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } /* * parameter area for the set/clear control bit callbacks */ struct ec_creg_mask_parms { - unsigned long orvals[16]; - unsigned long andvals[16]; + unsigned long orval; + unsigned long andval; + int cr; }; /* @@ -313,11 +519,9 @@ static void smp_ctl_bit_callback(void *info) { struct ec_creg_mask_parms *pp = info; unsigned long cregs[16]; - int i; __ctl_store(cregs, 0, 15); - for (i = 0; i <= 15; i++) - cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i]; + cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; __ctl_load(cregs, 0, 15); } @@ -326,11 +530,8 @@ static void smp_ctl_bit_callback(void *info) */ void smp_ctl_set_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.orvals[cr] = 1UL << bit; on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -340,216 +541,175 @@ EXPORT_SYMBOL(smp_ctl_set_bit); */ void smp_ctl_clear_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.andvals[cr] = ~(1UL << bit); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) -static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) +struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; +EXPORT_SYMBOL_GPL(zfcpdump_save_areas); + +static void __init smp_get_save_area(int cpu, u16 address) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) - return; + void *lc = pcpu_devices[0].lowcore; + struct save_area *save_area; + if (is_kdump_kernel()) return; + if (!OLDMEM_BASE && (address == boot_cpu_address || + ipl_info.type != IPL_TYPE_FCP_DUMP)) + return; if (cpu >= NR_CPUS) { - pr_warning("CPU %i exceeds the maximum %i and is excluded from " - "the dump\n", cpu, NR_CPUS - 1); + pr_warning("CPU %i exceeds the maximum %i and is excluded " + "from the dump\n", cpu, NR_CPUS - 1); return; } - zfcpdump_save_areas[cpu] = kmalloc(sizeof(struct save_area), GFP_KERNEL); - while (raw_sigp(phy_cpu, sigp_stop_and_store_status) == sigp_busy) - cpu_relax(); - memcpy_real(zfcpdump_save_areas[cpu], - (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE, - sizeof(struct save_area)); + save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + zfcpdump_save_areas[cpu] = save_area; +#ifdef CONFIG_CRASH_DUMP + if (address == boot_cpu_address) { + /* Copy the registers of the boot cpu. */ + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + SAVE_AREA_BASE - PAGE_SIZE, 0); + return; + } +#endif + /* Get the registers of a non-boot cpu. */ + __pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL); + memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); } -struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); - -#else - -static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } - -#endif /* CONFIG_ZFCPDUMP */ - -static int cpu_known(int cpu_id) +int smp_store_status(int cpu) { - int cpu; + struct pcpu *pcpu; - for_each_present_cpu(cpu) { - if (__cpu_logical_map[cpu] == cpu_id) - return 1; - } + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status, + 0, NULL) != sigp_order_code_accepted) + return -EIO; return 0; } -static int smp_rescan_cpus_sigp(cpumask_t avail) -{ - int cpu_id, logical_cpu; +#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ - logical_cpu = cpumask_first(&avail); - if (logical_cpu >= nr_cpu_ids) - return 0; - for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) { - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); - if (!cpu_stopped(logical_cpu)) - continue; - set_cpu_present(logical_cpu, true); - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = cpumask_next(logical_cpu, &avail); - if (logical_cpu >= nr_cpu_ids) - break; - } - return 0; -} +static inline void smp_get_save_area(int cpu, u16 address) { } + +#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ -static int smp_rescan_cpus_sclp(cpumask_t avail) +static struct sclp_cpu_info *smp_get_cpu_info(void) { + static int use_sigp_detection; struct sclp_cpu_info *info; - int cpu_id, logical_cpu, cpu; - int rc; - - logical_cpu = cpumask_first(&avail); - if (logical_cpu >= nr_cpu_ids) - return 0; - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - rc = sclp_get_cpu_info(info); - if (rc) - goto out; - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) - continue; - cpu_id = info->cpu[cpu].address; - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); - set_cpu_present(logical_cpu, true); - if (cpu >= info->configured) - smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; - else - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = cpumask_next(logical_cpu, &avail); - if (logical_cpu >= nr_cpu_ids) - break; + int address; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + use_sigp_detection = 1; + for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) == + sigp_not_operational) + continue; + info->cpu[info->configured].address = address; + info->configured++; + } + info->combined = info->configured; } -out: - kfree(info); - return rc; + return info; } -static int __smp_rescan_cpus(void) +static int __devinit smp_add_present_cpu(int cpu); + +static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info, + int sysfs_add) { + struct pcpu *pcpu; cpumask_t avail; + int cpu, nr, i; + nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - if (smp_use_sigp_detection) - return smp_rescan_cpus_sigp(avail); - else - return smp_rescan_cpus_sclp(avail); + cpu = cpumask_first(&avail); + for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = info->cpu[i].address; + pcpu->state = (cpu >= info->configured) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + } + return nr; } static void __init smp_detect_cpus(void) { unsigned int cpu, c_cpus, s_cpus; struct sclp_cpu_info *info; - u16 boot_cpu_addr, cpu_addr; - c_cpus = 1; - s_cpus = 0; - boot_cpu_addr = __cpu_logical_map[0]; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = smp_get_cpu_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE && !is_kdump_kernel()) { - struct save_area *save_area; - - save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); - if (!save_area) - panic("could not allocate memory for save area\n"); - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), - 0x200, 0); - zfcpdump_save_areas[0] = save_area; - } -#endif - /* Use sigp detection algorithm if sclp doesn't work. */ - if (sclp_get_cpu_info(info)) { - smp_use_sigp_detection = 1; - for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) { - if (cpu == boot_cpu_addr) - continue; - if (!raw_cpu_stopped(cpu)) - continue; - smp_get_save_area(c_cpus, cpu); - c_cpus++; - } - goto out; - } - if (info->has_cpu_type) { for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address == boot_cpu_addr) { - smp_cpu_type = info->cpu[cpu].type; - break; - } + if (info->cpu[cpu].address != boot_cpu_address) + continue; + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; } } - + c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) + if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) continue; - cpu_addr = info->cpu[cpu].address; - if (cpu_addr == boot_cpu_addr) - continue; - if (!raw_cpu_stopped(cpu_addr)) { + if (cpu < info->configured) { + smp_get_save_area(c_cpus, info->cpu[cpu].address); + c_cpus++; + } else s_cpus++; - continue; - } - smp_get_save_area(c_cpus, cpu_addr); - c_cpus++; } -out: - kfree(info); pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); get_online_cpus(); - __smp_rescan_cpus(); + __smp_rescan_cpus(info, 0); put_online_cpus(); + kfree(info); } /* * Activate a secondary processor. */ -int __cpuinit start_secondary(void *cpuvoid) +static void __cpuinit smp_start_secondary(void *cpuvoid) { + S390_lowcore.last_update_clock = get_clock(); + S390_lowcore.restart_stack = (unsigned long) restart_stack; + S390_lowcore.restart_fn = (unsigned long) do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1UL; + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); cpu_init(); preempt_disable(); init_cpu_timer(); init_cpu_vtimer(); pfault_init(); - notify_cpu_starting(smp_processor_id()); ipi_call_lock(); set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); - __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - S390_lowcore.restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - __ctl_set_bit(0, 28); /* Enable lowcore protection */ /* * Wait until the cpu which brought this one up marked it * active before enabling interrupts. @@ -559,7 +719,6 @@ int __cpuinit start_secondary(void *cpuvoid) local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); - return 0; } struct create_idle { @@ -578,82 +737,20 @@ static void __cpuinit smp_fork_idle(struct work_struct *work) complete(&c_idle->done); } -static int __cpuinit smp_alloc_lowcore(int cpu) -{ - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; - - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - if (!lowcore) - return -ENOMEM; - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - if (!panic_stack || !async_stack) - goto out; - memcpy(lowcore, &S390_lowcore, 512); - memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); - lowcore->async_stack = async_stack + ASYNC_SIZE; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - lowcore->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lowcore->restart_psw.mask |= PSW_ASC_HOME; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - unsigned long save_area; - - save_area = get_zeroed_page(GFP_KERNEL); - if (!save_area) - goto out; - lowcore->extended_save_area_addr = (u32) save_area; - } -#else - if (vdso_alloc_per_cpu(cpu, lowcore)) - goto out; -#endif - lowcore_ptr[cpu] = lowcore; - return 0; - -out: - free_page(panic_stack); - free_pages(async_stack, ASYNC_ORDER); - free_pages((unsigned long) lowcore, LC_ORDER); - return -ENOMEM; -} - -static void smp_free_lowcore(int cpu) -{ - struct _lowcore *lowcore; - - lowcore = lowcore_ptr[cpu]; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - free_page((unsigned long) lowcore->extended_save_area_addr); -#else - vdso_free_per_cpu(cpu, lowcore); -#endif - free_page(lowcore->panic_stack - PAGE_SIZE); - free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); - free_pages((unsigned long) lowcore, LC_ORDER); - lowcore_ptr[cpu] = NULL; -} - /* Upping and downing of CPUs */ int __cpuinit __cpu_up(unsigned int cpu) { - struct _lowcore *cpu_lowcore; struct create_idle c_idle; - struct task_struct *idle; - struct stack_frame *sf; - u32 lowcore; - int ccode; + struct pcpu *pcpu; + int rc; - if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) + pcpu = pcpu_devices + cpu; + if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - idle = current_set[cpu]; - if (!idle) { + if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) != + sigp_order_code_accepted) + return -EIO; + if (!pcpu->idle) { c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done); INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle); c_idle.cpu = cpu; @@ -661,68 +758,28 @@ int __cpuinit __cpu_up(unsigned int cpu) wait_for_completion(&c_idle.done); if (IS_ERR(c_idle.idle)) return PTR_ERR(c_idle.idle); - idle = c_idle.idle; - current_set[cpu] = c_idle.idle; + pcpu->idle = c_idle.idle; } - init_idle(idle, cpu); - if (smp_alloc_lowcore(cpu)) - return -ENOMEM; - do { - ccode = sigp(cpu, sigp_initial_cpu_reset); - if (ccode == sigp_busy) - udelay(10); - if (ccode == sigp_not_operational) - goto err_out; - } while (ccode == sigp_busy); - - lowcore = (u32)(unsigned long)lowcore_ptr[cpu]; - while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy) - udelay(10); - - cpu_lowcore = lowcore_ptr[cpu]; - cpu_lowcore->kernel_stack = (unsigned long) - task_stack_page(idle) + THREAD_SIZE; - cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle); - sf = (struct stack_frame *) (cpu_lowcore->kernel_stack - - sizeof(struct pt_regs) - - sizeof(struct stack_frame)); - memset(sf, 0, sizeof(struct stack_frame)); - sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->gpregs_save_area[15] = (unsigned long) sf; - __ctl_store(cpu_lowcore->cregs_save_area, 0, 15); - atomic_inc(&init_mm.context.attach_count); - asm volatile( - " stam 0,15,0(%0)" - : : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); - cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; - cpu_lowcore->current_task = (unsigned long) idle; - cpu_lowcore->cpu_nr = cpu; - cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; - cpu_lowcore->machine_flags = S390_lowcore.machine_flags; - cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; - memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); - eieio(); - - while (sigp(cpu, sigp_restart) == sigp_busy) - udelay(10); - + init_idle(pcpu->idle, cpu); + rc = pcpu_alloc_lowcore(pcpu, cpu); + if (rc) + return rc; + pcpu_prepare_secondary(pcpu, cpu); + pcpu_attach_task(pcpu, pcpu->idle); + pcpu_start_fn(pcpu, smp_start_secondary, NULL); while (!cpu_online(cpu)) cpu_relax(); return 0; - -err_out: - smp_free_lowcore(cpu); - return -EIO; } static int __init setup_possible_cpus(char *s) { - int pcpus, cpu; + int max, cpu; - pcpus = simple_strtoul(s, NULL, 0); + if (kstrtoint(s, 0, &max) < 0) + return 0; init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++) + for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) set_cpu_possible(cpu, true); return 0; } @@ -732,113 +789,67 @@ early_param("possible_cpus", setup_possible_cpus); int __cpu_disable(void) { - struct ec_creg_mask_parms cr_parms; - int cpu = smp_processor_id(); - - set_cpu_online(cpu, false); + unsigned long cregs[16]; - /* Disable pfault pseudo page faults on this cpu. */ + set_cpu_online(smp_processor_id(), false); + /* Disable pseudo page faults on this cpu. */ pfault_fini(); - - memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); - memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); - - /* disable all external interrupts */ - cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 | - 1 << 10 | 1 << 9 | 1 << 6 | 1 << 5 | - 1 << 4); - /* disable all I/O interrupts */ - cr_parms.orvals[6] = 0; - cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | - 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24); - /* disable most machine checks */ - cr_parms.orvals[14] = 0; - cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 | - 1 << 25 | 1 << 24); - - smp_ctl_bit_callback(&cr_parms); - + /* Disable interrupt sources via control register. */ + __ctl_store(cregs, 0, 15); + cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ + __ctl_load(cregs, 0, 15); return 0; } void __cpu_die(unsigned int cpu) { + struct pcpu *pcpu; + /* Wait until target cpu is down */ - while (!cpu_stopped(cpu)) + pcpu = pcpu_devices + cpu; + while (!pcpu_stopped(pcpu)) cpu_relax(); - while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy) - udelay(10); - smp_free_lowcore(cpu); + pcpu_free_lowcore(pcpu); atomic_dec(&init_mm.context.attach_count); } void __noreturn cpu_die(void) { idle_task_exit(); - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); + for (;;) ; } #endif /* CONFIG_HOTPLUG_CPU */ void __init smp_prepare_cpus(unsigned int max_cpus) { -#ifndef CONFIG_64BIT - unsigned long save_area = 0; -#endif - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; - - smp_detect_cpus(); - /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1202"); - - /* Reallocate current lowcore, but keep its contents. */ - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - BUG_ON(!lowcore || !panic_stack || !async_stack); -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - save_area = get_zeroed_page(GFP_KERNEL); -#endif - local_irq_disable(); - local_mcck_disable(); - lowcore_ptr[smp_processor_id()] = lowcore; - *lowcore = S390_lowcore; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->async_stack = async_stack + ASYNC_SIZE; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - lowcore->extended_save_area_addr = (u32) save_area; -#endif - set_prefix((u32)(unsigned long) lowcore); - local_mcck_enable(); - local_irq_enable(); -#ifdef CONFIG_64BIT - if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore)) - BUG(); -#endif + smp_detect_cpus(); } void __init smp_prepare_boot_cpu(void) { - BUG_ON(smp_processor_id() != 0); - - current_thread_info()->cpu = 0; - set_cpu_present(0, true); - set_cpu_online(0, true); + struct pcpu *pcpu = pcpu_devices; + + boot_cpu_address = stap(); + pcpu->idle = current; + pcpu->state = CPU_STATE_CONFIGURED; + pcpu->address = boot_cpu_address; + pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE; S390_lowcore.percpu_offset = __per_cpu_offset[0]; - current_set[0] = current; - smp_cpu_state[0] = CPU_STATE_CONFIGURED; cpu_set_polarization(0, POLARIZATION_UNKNOWN); + set_cpu_present(0, true); + set_cpu_online(0, true); } void __init smp_cpus_done(unsigned int max_cpus) @@ -848,7 +859,6 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { S390_lowcore.cpu_nr = 0; - __cpu_logical_map[0] = stap(); } /* @@ -864,56 +874,57 @@ int setup_profiling_timer(unsigned int multiplier) #ifdef CONFIG_HOTPLUG_CPU static ssize_t cpu_configure_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]); + count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); mutex_unlock(&smp_cpu_state_mutex); return count; } static ssize_t cpu_configure_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, size_t count) { - int cpu = dev->id; - int val, rc; + struct pcpu *pcpu; + int cpu, val, rc; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ + cpu = dev->id; if (cpu_online(cpu) || cpu == 0) goto out; + pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: - if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) { - rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); - if (!rc) { - smp_cpu_state[cpu] = CPU_STATE_STANDBY; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - topology_expect_change(); - } - } + if (pcpu->state != CPU_STATE_CONFIGURED) + break; + rc = sclp_cpu_deconfigure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_STANDBY; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; case 1: - if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) { - rc = sclp_cpu_configure(__cpu_logical_map[cpu]); - if (!rc) { - smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - topology_expect_change(); - } - } + if (pcpu->state != CPU_STATE_STANDBY) + break; + rc = sclp_cpu_configure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_CONFIGURED; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; default: break; @@ -929,7 +940,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); + return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -1021,7 +1032,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = &pcpu_devices[cpu].cpu; struct device *s = &c->dev; struct s390_idle_data *idle; int err = 0; @@ -1047,7 +1058,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = { static int __devinit smp_add_present_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = &pcpu_devices[cpu].cpu; struct device *s = &c->dev; int rc; @@ -1085,29 +1096,21 @@ out: int __ref smp_rescan_cpus(void) { - cpumask_t newcpus; - int cpu; - int rc; + struct sclp_cpu_info *info; + int nr; + info = smp_get_cpu_info(); + if (!info) + return -ENOMEM; get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - cpumask_copy(&newcpus, cpu_present_mask); - rc = __smp_rescan_cpus(); - if (rc) - goto out; - cpumask_andnot(&newcpus, cpu_present_mask, &newcpus); - for_each_cpu(cpu, &newcpus) { - rc = smp_add_present_cpu(cpu); - if (rc) - set_cpu_present(cpu, false); - } - rc = 0; -out: + nr = __smp_rescan_cpus(info, 1); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); - if (!cpumask_empty(&newcpus)) + kfree(info); + if (nr) topology_schedule_update(); - return rc; + return 0; } static ssize_t __ref rescan_store(struct device *dev, diff --git a/arch/s390/kernel/switch_cpu.S b/arch/s390/kernel/switch_cpu.S deleted file mode 100644 index bfe070bc7659..000000000000 --- a/arch/s390/kernel/switch_cpu.S +++ /dev/null @@ -1,58 +0,0 @@ -/* - * 31-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include -#include -#include - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -# %r3 - function parameter -# %r4 - stack poiner -# %r5 - current cpu -# %r6 - destination cpu - - .section .text -ENTRY(smp_switch_to_cpu) - stm %r6,%r15,__SF_GPRS(%r15) - lr %r1,%r15 - ahi %r15,-STACK_FRAME_OVERHEAD - st %r1,__SF_BACKCHAIN(%r15) - basr %r13,0 -0: la %r1,.gprregs_addr-0b(%r13) - l %r1,0(%r1) - stm %r0,%r15,0(%r1) -1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */ - brc 2,1b /* busy, try again */ -2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */ - brc 2,2b /* busy, try again */ -3: j 3b - -ENTRY(smp_restart_cpu) - basr %r13,0 -0: la %r1,.gprregs_addr-0b(%r13) - l %r1,0(%r1) - lm %r0,%r15,0(%r1) -1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */ - brc 10,1b /* busy, accepted (status 0), running */ - tmll %r0,0x40 /* Test if calling CPU is stopped */ - jz 1b - ltr %r4,%r4 /* New stack ? */ - jz 1f - lr %r15,%r4 -1: lr %r14,%r2 /* r14: Function to call */ - lr %r2,%r3 /* r2 : Parameter for function*/ - basr %r14,%r14 /* Call function */ - -.gprregs_addr: - .long .gprregs - - .section .data,"aw",@progbits -.gprregs: - .rept 16 - .long 0 - .endr diff --git a/arch/s390/kernel/switch_cpu64.S b/arch/s390/kernel/switch_cpu64.S deleted file mode 100644 index fcc42d799e41..000000000000 --- a/arch/s390/kernel/switch_cpu64.S +++ /dev/null @@ -1,51 +0,0 @@ -/* - * 64-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include -#include -#include - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -# %r3 - function parameter -# %r4 - stack poiner -# %r5 - current cpu -# %r6 - destination cpu - - .section .text -ENTRY(smp_switch_to_cpu) - stmg %r6,%r15,__SF_GPRS(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - larl %r1,.gprregs - stmg %r0,%r15,0(%r1) -1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */ - brc 2,1b /* busy, try again */ -2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */ - brc 2,2b /* busy, try again */ -3: j 3b - -ENTRY(smp_restart_cpu) - larl %r1,.gprregs - lmg %r0,%r15,0(%r1) -1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */ - brc 10,1b /* busy, accepted (status 0), running */ - tmll %r0,0x40 /* Test if calling CPU is stopped */ - jz 1b - ltgr %r4,%r4 /* New stack ? */ - jz 1f - lgr %r15,%r4 -1: lgr %r14,%r2 /* r14: Function to call */ - lgr %r2,%r3 /* r2 : Parameter for function*/ - basr %r14,%r14 /* Call function */ - - .section .data,"aw",@progbits -.gprregs: - .rept 16 - .quad 0 - .endr diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 2ef39d1519a9..ad3c79eceed7 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -179,9 +179,9 @@ pgm_check_entry: larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) 3: - sigp %r9,%r1,__SIGP_INITIAL_CPU_RESET - brc 8,4f /* accepted */ - brc 2,3b /* busy, try again */ + sigp %r9,%r1,11 /* sigp initial cpu reset */ + brc 8,4f /* accepted */ + brc 2,3b /* busy, try again */ /* Suspend CPU not available -> panic */ larl %r15,init_thread_union @@ -196,10 +196,10 @@ pgm_check_entry: lpsw 0(%r3) 4: /* Switch to suspend CPU */ - sigp %r9,%r1,__SIGP_RESTART /* start suspend CPU */ + sigp %r9,%r1,6 /* sigp restart to suspend CPU */ brc 2,4b /* busy, try again */ 5: - sigp %r9,%r2,__SIGP_STOP /* stop resume (current) CPU */ + sigp %r9,%r2,5 /* sigp stop to current resume CPU */ brc 2,5b /* busy, try again */ 6: j 6b @@ -207,7 +207,7 @@ restart_suspend: larl %r1,.Lresume_cpu llgh %r2,0(%r1) 7: - sigp %r9,%r2,__SIGP_SENSE /* Wait for resume CPU */ + sigp %r9,%r2,1 /* sigp sense, wait for resume CPU */ brc 8,7b /* accepted, status 0, still running */ brc 2,7b /* busy, try again */ tmll %r9,0x40 /* Test if resume CPU is stopped */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 7370a41948ca..4f8dc942257c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -79,12 +79,12 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, cpu < TOPOLOGY_CPU_BITS; cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) { - unsigned int rcpu, lcpu; + unsigned int rcpu; + int lcpu; rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; - for_each_present_cpu(lcpu) { - if (cpu_logical_map(lcpu) != rcpu) - continue; + lcpu = smp_find_processor_id(rcpu); + if (lcpu >= 0) { cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; cpumask_set_cpu(lcpu, &core->mask); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d73630b4fe1d..e704a9965f90 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -88,19 +88,12 @@ static void vdso_init_data(struct vdso_data *vd) } #ifdef CONFIG_64BIT -/* - * Setup per cpu vdso data page. - */ -static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) -{ -} - /* * Allocate/free per cpu vdso data. */ #define SEGMENT_ORDER 2 -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +int vdso_alloc_per_cpu(struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -139,7 +132,6 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) aste[4] = (u32)(addr_t) psal; lowcore->vdso_per_cpu_data = page_frame; - vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); return 0; out: @@ -149,7 +141,7 @@ out: return -ENOMEM; } -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +void vdso_free_per_cpu(struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -168,19 +160,15 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) free_pages(segment_table, SEGMENT_ORDER); } -static void __vdso_init_cr5(void *dummy) +static void vdso_init_cr5(void) { unsigned long cr5; + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + return; cr5 = offsetof(struct _lowcore, paste); __ctl_load(cr5, 5, 5); } - -static void vdso_init_cr5(void) -{ - if (user_mode != HOME_SPACE_MODE && vdso_enabled) - on_each_cpu(__vdso_init_cr5, NULL, 1); -} #endif /* CONFIG_64BIT */ /* @@ -322,10 +310,8 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; -#ifndef CONFIG_SMP - if (vdso_alloc_per_cpu(0, &S390_lowcore)) + if (vdso_alloc_per_cpu(&S390_lowcore)) BUG(); -#endif vdso_init_cr5(); #endif /* CONFIG_64BIT */ @@ -335,7 +321,7 @@ static int __init vdso_init(void) return 0; } -arch_initcall(vdso_init); +early_initcall(vdso_init); int in_gate_area_no_mm(unsigned long addr) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index bb48977f5469..7bacee9a546f 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -570,6 +570,9 @@ void init_cpu_vtimer(void) /* enable cpu timer interrupts */ __ctl_set_bit(0,10); + + /* set initial cpu timer */ + set_vtimer(0x7fffffffffffffffULL); } static int __cpuinit s390_nohz_notify(struct notifier_block *self, diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 91754ffb9203..093eb694d9c1 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -10,6 +10,7 @@ #include #include #include +#include #include int spin_retry = 1000; @@ -24,21 +25,6 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); -static inline void _raw_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - -static inline void _raw_yield_cpu(int cpu) -{ - if (MACHINE_HAS_DIAG9C) - asm volatile("diag %0,0,0x9c" - : : "d" (cpu_logical_map(cpu))); - else - _raw_yield(); -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { int count = spin_retry; @@ -60,7 +46,7 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) } owner = lp->owner_cpu; if (owner) - _raw_yield_cpu(~owner); + smp_yield_cpu(~owner); if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) return; } @@ -91,7 +77,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) } owner = lp->owner_cpu; if (owner) - _raw_yield_cpu(~owner); + smp_yield_cpu(~owner); local_irq_disable(); if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) return; @@ -121,7 +107,7 @@ void arch_spin_relax(arch_spinlock_t *lock) if (cpu != 0) { if (MACHINE_IS_VM || MACHINE_IS_KVM || !smp_vcpu_scheduled(~cpu)) - _raw_yield_cpu(~cpu); + smp_yield_cpu(~cpu); } } EXPORT_SYMBOL(arch_spin_relax); @@ -133,7 +119,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_read_can_lock(rw)) @@ -153,7 +139,7 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) local_irq_restore(flags); while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_read_can_lock(rw)) @@ -188,7 +174,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_write_can_lock(rw)) @@ -206,7 +192,7 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) local_irq_restore(flags); while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_write_can_lock(rw)) diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c index 87fc0ac11e67..69df137310bc 100644 --- a/drivers/s390/char/sclp_quiesce.c +++ b/drivers/s390/char/sclp_quiesce.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "sclp.h" diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1b6d9247fdc7..3303d66b2794 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 4c1051e37a0e2a941115c6fb7ba08c318f25a0f9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 11 Mar 2012 11:59:27 -0400 Subject: [S390] rework idle code Whenever the cpu loads an enabled wait PSW it will appear as idle to the underlying host system. The code in default_idle calls vtime_stop_cpu which does the necessary voodoo to get the cpu time accounting right. The udelay code just loads an enabled wait PSW. To correct this rework the vtime_stop_cpu/vtime_start_cpu logic and move the difficult parts to entry[64].S, vtime_stop_cpu can now be called from anywhere and vtime_start_cpu is gone. The correction of the cpu time during wakeup from an enabled wait PSW is done with a critical section in entry[64].S. As vtime_start_cpu is gone, s390_idle_check can be removed as well. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 9 +-- arch/s390/include/asm/timer.h | 4 +- arch/s390/kernel/asm-offsets.c | 8 ++ arch/s390/kernel/entry.S | 76 +++++++++++++++++-- arch/s390/kernel/entry.h | 5 +- arch/s390/kernel/entry64.S | 65 +++++++++++++++- arch/s390/kernel/irq.c | 2 - arch/s390/kernel/nmi.c | 2 - arch/s390/kernel/process.c | 7 +- arch/s390/kernel/smp.c | 46 +++++------- arch/s390/kernel/vtime.c | 163 ++++++++-------------------------------- arch/s390/lib/delay.c | 31 ++++---- drivers/s390/cio/cio.c | 2 - 13 files changed, 210 insertions(+), 210 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index c23c3900c304..24ef186a1c4f 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -170,24 +170,17 @@ struct s390_idle_data { unsigned int sequence; unsigned long long idle_count; unsigned long long idle_enter; + unsigned long long idle_exit; unsigned long long idle_time; int nohz_delay; }; DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void vtime_start_cpu(__u64 int_clock, __u64 enter_timer); cputime64_t s390_get_idle_time(int cpu); #define arch_idle_time(cpu) s390_get_idle_time(cpu) -static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock, - __u64 enter_timer) -{ - if (regs->psw.mask & PSW_MASK_WAIT) - vtime_start_cpu(int_clock, enter_timer); -} - static inline int s390_nohz_delay(int cpu) { return __get_cpu_var(s390_idle).nohz_delay != 0; diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h index 814243cafdfe..e63069ba39e3 100644 --- a/arch/s390/include/asm/timer.h +++ b/arch/s390/include/asm/timer.h @@ -33,8 +33,8 @@ struct vtimer_queue { spinlock_t lock; __u64 timer; /* last programmed timer */ __u64 elapsed; /* elapsed time of timer expire values */ - __u64 idle; /* temp var for idle */ - int do_spt; /* =1: reprogram cpu timer in idle */ + __u64 idle_enter; /* cpu timer on idle enter */ + __u64 idle_exit; /* cpu timer on idle exit */ }; extern void init_virt_timer(struct vtimer_list *timer); diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index aeeaf896be9b..ed8c913db79e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include #include @@ -70,6 +72,12 @@ int main(void) DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); BLANK(); + /* idle data offsets */ + DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter)); + DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit)); + /* vtimer queue offsets */ + DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter)); + DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit)); /* lowcore offsets */ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6143521a4fff..74ee563fe62b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -105,14 +105,14 @@ STACK_SIZE = 1 << STACK_SHIFT .macro ADD64 high,low,timer al \high,\timer - al \low,\timer+4 + al \low,4+\timer brc 12,.+8 ahi \high,1 .endm .macro SUB64 high,low,timer sl \high,\timer - sl \low,\timer+4 + sl \low,4+\timer brc 3,.+8 ahi \high,-1 .endm @@ -471,7 +471,6 @@ io_tif: jnz io_work # there is work to do (signals etc.) io_restore: mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) - ni __LC_RETURN_PSW+1,0xfd # clean wait state bit stpt __LC_EXIT_TIMER lm %r0,%r15,__PT_R0(%r11) lpsw __LC_RETURN_PSW @@ -612,6 +611,26 @@ ext_skip: basr %r14,%r1 # call do_extint j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + st %r4,__SF_EMPTY(%r15) + basr %r1,0 + la %r1,psw_idle_lpsw+4-.(%r1) + st %r1,__SF_EMPTY+4(%r15) + oi __SF_EMPTY+4(%r15),0x80 + la %r1,.Lvtimer_max-psw_idle_lpsw-4(%r1) + stck __IDLE_ENTER(%r2) + ltr %r5,%r5 + stpt __VQ_IDLE_ENTER(%r3) + jz psw_idle_lpsw + spt 0(%r1) +psw_idle_lpsw: + lpsw __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* @@ -673,7 +692,6 @@ mcck_skip: TRACE_IRQS_ON mcck_return: mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f lm %r0,%r15,__PT_R0(%r11) @@ -748,6 +766,8 @@ cleanup_table: .long io_tif + 0x80000000 .long io_restore + 0x80000000 .long io_done + 0x80000000 + .long psw_idle + 0x80000000 + .long psw_idle_end + 0x80000000 cleanup_critical: cl %r9,BASED(cleanup_table) # system_call @@ -766,6 +786,10 @@ cleanup_critical: jl cleanup_io_tif cl %r9,BASED(cleanup_table+28) # io_done jl cleanup_io_restore + cl %r9,BASED(cleanup_table+32) # psw_idle + jl 0f + cl %r9,BASED(cleanup_table+36) # psw_idle_end + jl cleanup_idle 0: br %r14 cleanup_system_call: @@ -849,7 +873,6 @@ cleanup_io_restore: jhe 0f l %r9,12(%r11) # get saved r11 pointer to pt_regs mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) - ni __LC_RETURN_PSW+1,0xfd # clear wait state bit mvc 0(32,%r11),__PT_R8(%r9) lm %r0,%r7,__PT_R0(%r9) 0: lm %r8,%r9,__LC_RETURN_PSW @@ -857,11 +880,52 @@ cleanup_io_restore: cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0: # check if stck has been executed + cl %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) + mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) + j 2f +1: # check if the cpu timer has been reprogrammed + ltr %r5,%r5 + jz 2f + spt __VQ_IDLE_ENTER(%r3) +2: # account system time going idle + lm %r9,%r10,__LC_STEAL_TIMER + ADD64 %r9,%r10,__IDLE_ENTER(%r2) + SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK + stm %r9,%r10,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) + lm %r9,%r10,__LC_SYSTEM_TIMER + ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER + SUB64 %r9,%r10,__VQ_IDLE_ENTER(%r3) + stm %r9,%r10,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) + # prepare return psw + n %r8,BASED(cleanup_idle_wait) # clear wait state bit + l %r9,24(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .long psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: + .long 0xfffdffff + /* * Integer constants */ .align 4 -.Lnr_syscalls: .long NR_syscalls +.Lnr_syscalls: + .long NR_syscalls +.Lvtimer_max: + .quad 0x7fffffffffffffff /* * Symbol constants diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 92b1617d0c95..4984785e3078 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,7 +4,8 @@ #include #include #include - +#include +#include extern void (*pgm_check_table[128])(struct pt_regs *); extern void *restart_stack; @@ -16,6 +17,8 @@ void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, struct vtimer_queue *, + unsigned long, int); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e33789a45752..4e1c292fa7e3 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -489,7 +489,6 @@ io_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - ni __LC_RETURN_PSW+1,0xfd # clear wait state bit stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) @@ -631,6 +630,24 @@ ext_skip: brasl %r14,do_extint j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + stg %r4,__SF_EMPTY(%r15) + larl %r1,psw_idle_lpsw+4 + stg %r1,__SF_EMPTY+8(%r15) + larl %r1,.Lvtimer_max + stck __IDLE_ENTER(%r2) + ltr %r5,%r5 + stpt __VQ_IDLE_ENTER(%r3) + jz psw_idle_lpsw + spt 0(%r1) +psw_idle_lpsw: + lpswe __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* @@ -696,7 +713,6 @@ mcck_return: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER @@ -770,6 +786,8 @@ cleanup_table: .quad io_tif .quad io_restore .quad io_done + .quad psw_idle + .quad psw_idle_end cleanup_critical: clg %r9,BASED(cleanup_table) # system_call @@ -788,6 +806,10 @@ cleanup_critical: jl cleanup_io_tif clg %r9,BASED(cleanup_table+56) # io_done jl cleanup_io_restore + clg %r9,BASED(cleanup_table+64) # psw_idle + jl 0f + clg %r9,BASED(cleanup_table+72) # psw_idle_end + jl cleanup_idle 0: br %r14 @@ -877,7 +899,6 @@ cleanup_io_restore: je 0f lg %r9,24(%r11) # get saved r11 pointer to pt_regs mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - ni __LC_RETURN_PSW+1,0xfd # clear wait state bit mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 0: lmg %r8,%r9,__LC_RETURN_PSW @@ -885,6 +906,42 @@ cleanup_io_restore: cleanup_io_restore_insn: .quad io_done - 4 +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0: # check if stck & stpt have been executed + clg %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) + mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) + j 2f +1: # check if the cpu timer has been reprogrammed + ltr %r5,%r5 + jz 2f + spt __VQ_IDLE_ENTER(%r3) +2: # account system time going idle + lg %r9,__LC_STEAL_TIMER + alg %r9,__IDLE_ENTER(%r2) + slg %r9,__LC_LAST_UPDATE_CLOCK + stg %r9,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) + lg %r9,__LC_SYSTEM_TIMER + alg %r9,__LC_LAST_UPDATE_TIMER + slg %r9,__VQ_IDLE_ENTER(%r3) + stg %r9,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) + # prepare return psw + nihh %r8,0xfffd # clear wait state bit + lg %r9,48(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .quad psw_idle_lpsw + /* * Integer constants */ @@ -893,6 +950,8 @@ cleanup_io_restore_insn: .quad __critical_start .Lcritical_length: .quad __critical_end - __critical_start +.Lvtimer_max: + .quad 0x7fffffffffffffff #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index b9a7fdd9c814..09a014c62537 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -219,8 +219,6 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, code = (unsigned short) ext_int_code; old_regs = set_irq_regs(regs); - s390_idle_check(regs, S390_lowcore.int_clock, - S390_lowcore.async_enter_timer); irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0fd2e863e114..8c372ca61350 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -254,8 +254,6 @@ void notrace s390_do_machine_check(struct pt_regs *regs) int umode; nmi_enter(); - s390_idle_check(regs, S390_lowcore.mcck_clock, - S390_lowcore.mcck_enter_timer); kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++; mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index e795933eb2cb..78b3c149b8b6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -77,13 +77,8 @@ static void default_idle(void) local_irq_enable(); return; } - trace_hardirqs_on(); - /* Don't trace preempt off for idle. */ - stop_critical_timings(); - /* Stop virtual timer and halt the cpu. */ + /* Halt the cpu and keep track of cpu time accounting. */ vtime_stop_cpu(); - /* Reenable preemption tracer. */ - start_critical_timings(); } void cpu_idle(void) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6db8526a602d..afd6e5113a90 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -972,22 +972,16 @@ static DEVICE_ATTR(capability, 0444, show_capability, NULL); static ssize_t show_idle_count(struct device *dev, struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned long long idle_count; unsigned int sequence; - idle = &per_cpu(s390_idle, dev->id); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_count = idle->idle_count; - if (idle->idle_enter) - idle_count++; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->idle_enter)) + idle_count++; + } while ((sequence & 1) || (idle->sequence != sequence)); return sprintf(buf, "%llu\n", idle_count); } static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -995,24 +989,18 @@ static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; - unsigned long long now, idle_time, idle_enter; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int sequence; - idle = &per_cpu(s390_idle, dev->id); - now = get_clock(); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_time = idle->idle_time; - idle_enter = idle->idle_enter; - if (idle_enter != 0ULL && idle_enter < now) - idle_time += now - idle_enter; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; + do { + now = get_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->idle_enter); + idle_exit = ACCESS_ONCE(idle->idle_exit); + } while ((sequence & 1) || (idle->sequence != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; return sprintf(buf, "%llu\n", idle_time >> 12); } static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 7bacee9a546f..277ea712b232 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -26,6 +26,7 @@ #include #include #include +#include "entry.h" static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); @@ -123,153 +124,53 @@ void account_system_vtime(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(account_system_vtime); -void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer) +void __kprobes vtime_stop_cpu(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - __u64 idle_time, expires; + unsigned long long idle_time; + unsigned long psw_mask; - if (idle->idle_enter == 0ULL) - return; + trace_hardirqs_on(); + /* Don't trace preempt off for idle. */ + stop_critical_timings(); - /* Account time spent with enabled wait psw loaded as idle time. */ - idle_time = int_clock - idle->idle_enter; - account_idle_time(idle_time); - S390_lowcore.steal_timer += - idle->idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = int_clock; - - /* Account system time spent going idle. */ - S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; - S390_lowcore.last_update_timer = enter_timer; - - /* Restart vtime CPU timer */ - if (vq->do_spt) { - /* Program old expire value but first save progress. */ - expires = vq->idle - enter_timer; - expires += get_vtimer(); - set_vtimer(expires); - } else { - /* Don't account the CPU timer delta while the cpu was idle. */ - vq->elapsed -= vq->idle - enter_timer; - } + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + idle->nohz_delay = 0; + /* Call the assembler magic in entry.S */ + psw_idle(idle, vq, psw_mask, !list_empty(&vq->list)); + + /* Reenable preemption tracer. */ + start_critical_timings(); + + /* Account time spent with enabled wait psw loaded as idle time. */ idle->sequence++; smp_wmb(); + idle_time = idle->idle_exit - idle->idle_enter; idle->idle_time += idle_time; - idle->idle_enter = 0ULL; + idle->idle_enter = idle->idle_exit = 0ULL; idle->idle_count++; + account_idle_time(idle_time); smp_wmb(); idle->sequence++; } -void __kprobes vtime_stop_cpu(void) -{ - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - psw_t psw; - - /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - - idle->nohz_delay = 0; - - /* Check if the CPU timer needs to be reprogrammed. */ - if (vq->do_spt) { - __u64 vmax = VTIMER_MAX_SLICE; - /* - * The inline assembly is equivalent to - * vq->idle = get_cpu_timer(); - * set_cpu_timer(VTIMER_MAX_SLICE); - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_DAT | PSW_MASK_IO | - * PSW_MASK_EXT | PSW_MASK_MCHECK); - * The difference is that the inline assembly makes sure that - * the last three instruction are stpt, stck and lpsw in that - * order. This is done to increase the precision. - */ - asm volatile( -#ifndef CONFIG_64BIT - " basr 1,0\n" - "0: ahi 1,1f-0b\n" - " st 1,4(%2)\n" -#else /* CONFIG_64BIT */ - " larl 1,1f\n" - " stg 1,8(%2)\n" -#endif /* CONFIG_64BIT */ - " stpt 0(%4)\n" - " spt 0(%5)\n" - " stck 0(%3)\n" -#ifndef CONFIG_64BIT - " lpsw 0(%2)\n" -#else /* CONFIG_64BIT */ - " lpswe 0(%2)\n" -#endif /* CONFIG_64BIT */ - "1:" - : "=m" (idle->idle_enter), "=m" (vq->idle) - : "a" (&psw), "a" (&idle->idle_enter), - "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) - : "memory", "cc", "1"); - } else { - /* - * The inline assembly is equivalent to - * vq->idle = get_cpu_timer(); - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_DAT | PSW_MASK_IO | - * PSW_MASK_EXT | PSW_MASK_MCHECK); - * The difference is that the inline assembly makes sure that - * the last three instruction are stpt, stck and lpsw in that - * order. This is done to increase the precision. - */ - asm volatile( -#ifndef CONFIG_64BIT - " basr 1,0\n" - "0: ahi 1,1f-0b\n" - " st 1,4(%2)\n" -#else /* CONFIG_64BIT */ - " larl 1,1f\n" - " stg 1,8(%2)\n" -#endif /* CONFIG_64BIT */ - " stpt 0(%4)\n" - " stck 0(%3)\n" -#ifndef CONFIG_64BIT - " lpsw 0(%2)\n" -#else /* CONFIG_64BIT */ - " lpswe 0(%2)\n" -#endif /* CONFIG_64BIT */ - "1:" - : "=m" (idle->idle_enter), "=m" (vq->idle) - : "a" (&psw), "a" (&idle->idle_enter), - "a" (&vq->idle), "m" (psw) - : "memory", "cc", "1"); - } -} - cputime64_t s390_get_idle_time(int cpu) { - struct s390_idle_data *idle; - unsigned long long now, idle_time, idle_enter; + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; unsigned int sequence; - idle = &per_cpu(s390_idle, cpu); - - now = get_clock(); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_time = 0; - idle_enter = idle->idle_enter; - if (idle_enter != 0ULL && idle_enter < now) - idle_time = now - idle_enter; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; - return idle_time; + do { + now = get_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->idle_enter); + idle_exit = ACCESS_ONCE(idle->idle_exit); + } while ((sequence & 1) || (idle->sequence != sequence)); + return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; } /* @@ -346,7 +247,6 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code, } spin_unlock(&vq->lock); - vq->do_spt = list_empty(&cb_list); do_callbacks(&cb_list); /* next event is first in list */ @@ -355,8 +255,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code, if (!list_empty(&vq->list)) { event = list_first_entry(&vq->list, struct vtimer_list, entry); next = event->expires; - } else - vq->do_spt = 0; + } spin_unlock(&vq->lock); /* * To improve precision add the time spent by the diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index db92f044024c..9f1f71e85778 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -13,6 +13,7 @@ #include #include #include +#include void __delay(unsigned long loops) { @@ -28,36 +29,33 @@ void __delay(unsigned long loops) static void __udelay_disabled(unsigned long long usecs) { - unsigned long mask, cr0, cr0_saved; - u64 clock_saved; - u64 end; + unsigned long cr0, cr6, new; + u64 clock_saved, end; - mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | - PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); - __ctl_store(cr0_saved, 0, 0); - cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; - __ctl_load(cr0 , 0, 0); + __ctl_store(cr0, 0, 0); + __ctl_store(cr6, 6, 6); + new = (cr0 & 0xffff00e0) | 0x00000800; + __ctl_load(new , 0, 0); + new = 0; + __ctl_load(new, 6, 6); lockdep_off(); do { set_clock_comparator(end); - trace_hardirqs_on(); - __load_psw_mask(mask); + vtime_stop_cpu(); local_irq_disable(); } while (get_clock() < end); lockdep_on(); - __ctl_load(cr0_saved, 0, 0); + __ctl_load(cr0, 0, 0); + __ctl_load(cr6, 6, 6); local_tick_enable(clock_saved); } static void __udelay_enabled(unsigned long long usecs) { - unsigned long mask; - u64 clock_saved; - u64 end; + u64 clock_saved, end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; end = get_clock() + (usecs << 12); do { clock_saved = 0; @@ -65,8 +63,7 @@ static void __udelay_enabled(unsigned long long usecs) clock_saved = local_tick_disable(); set_clock_comparator(end); } - trace_hardirqs_on(); - __load_psw_mask(mask); + vtime_stop_cpu(); local_irq_disable(); if (clock_saved) local_tick_enable(clock_saved); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index dc67c397449e..a49c46c91983 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -601,8 +601,6 @@ void __irq_entry do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - s390_idle_check(regs, S390_lowcore.int_clock, - S390_lowcore.async_enter_timer); irq_enter(); __this_cpu_write(s390_idle.nohz_delay, 1); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) -- cgit v1.2.3 From 2215011dd9c1b01f1fc430620ed31f2baad267c3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 11 Mar 2012 11:59:30 -0400 Subject: [S390] irq: set __ARCH_IRQ_EXIT_IRQS_DISABLED Set __ARCH_IRQ_EXIT_IRQS_DISABLED in order to optimize irq_exit() a bit, since we call __do_softirq() instead of do_softirq(). This saves several needless checks, pointless interrupt disabling and an extra branch. If do_softirq() gets called from process context we still switch to the async stack. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index e4155d3eb2cb..510ba9ef4248 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -18,6 +18,7 @@ #define __ARCH_IRQ_STAT #define __ARCH_HAS_DO_SOFTIRQ +#define __ARCH_IRQ_EXIT_IRQS_DISABLED #define HARDIRQ_BITS 8 -- cgit v1.2.3 From fde15c3a3adc7b65cd0610dd6bca4804ee7ffd38 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 11 Mar 2012 11:59:31 -0400 Subject: [S390] irq: external interrupt code passing The external interrupt handlers have a parameter called ext_int_code. Besides the name this paramter does not only contain the ext_int_code but in addition also the "cpu address" (POP) which caused the external interrupt. To make the code a bit more obvious pass a struct instead so the called function can easily distinguish between external interrupt code and cpu address. The cpu address field however is named "subcode" since some external interrupt sources do not pass a cpu address but a different parameter (or none at all). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 7 ++++++- arch/s390/kernel/entry.h | 3 ++- arch/s390/kernel/irq.c | 12 +++++------- arch/s390/kernel/smp.c | 4 ++-- arch/s390/kernel/time.c | 4 ++-- arch/s390/kernel/vtime.c | 2 +- arch/s390/mm/fault.c | 4 ++-- arch/s390/oprofile/hwsampler.c | 6 +++--- drivers/s390/block/dasd_diag.c | 8 ++++---- drivers/s390/char/sclp.c | 4 ++-- drivers/s390/kvm/kvm_virtio.c | 6 ++---- net/iucv/iucv.c | 2 +- 12 files changed, 32 insertions(+), 30 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba6d85f88d50..acee1806f61e 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -34,7 +34,12 @@ enum interruption_class { NR_IRQS, }; -typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); +struct ext_code { + unsigned short subcode; + unsigned short code; +}; + +typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); int register_external_interrupt(u16 code, ext_int_handler_t handler); int unregister_external_interrupt(u16 code, ext_int_handler_t handler); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 4984785e3078..6cdddac93a2e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -35,7 +35,8 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); void do_notify_resume(struct pt_regs *regs); -void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +struct ext_code; +void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 09a014c62537..7a97d57f7073 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -209,29 +209,27 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, +void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct pt_regs *old_regs; - unsigned short code; struct ext_int_info *p; int index; - code = (unsigned short) ext_int_code; old_regs = set_irq_regs(regs); irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; - if (code != 0x1004) + if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; - index = ext_hash(code); + index = ext_hash(ext_code.code); rcu_read_lock(); list_for_each_entry_rcu(p, &ext_int_hash[index], entry) - if (likely(p->code == code)) - p->handler(ext_int_code, param32, param64); + if (likely(p->code == ext_code.code)) + p->handler(ext_code, param32, param64); rcu_read_unlock(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index afd6e5113a90..734e644972ab 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -434,14 +434,14 @@ void smp_stop_cpu(void) * This is the main routine where commands issued by other * cpus are handled. */ -static void do_ext_call_interrupt(unsigned int ext_int_code, +static void do_ext_call_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { unsigned long bits; int cpu; cpu = smp_processor_id(); - if ((ext_int_code & 0xffff) == 0x1202) + if (ext_code.code == 0x1202) kstat_cpu(cpu).irqs[EXTINT_EXC]++; else kstat_cpu(cpu).irqs[EXTINT_EMS]++; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 14da278febbf..d4e1cb1dbcd1 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -165,7 +165,7 @@ void init_cpu_timer(void) __ctl_set_bit(0, 4); } -static void clock_comparator_interrupt(unsigned int ext_int_code, +static void clock_comparator_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { @@ -177,7 +177,7 @@ static void clock_comparator_interrupt(unsigned int ext_int_code, static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); -static void timing_alert_interrupt(unsigned int ext_int_code, +static void timing_alert_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 277ea712b232..39ebff506946 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -220,7 +220,7 @@ static void do_callbacks(struct list_head *cb_list) /* * Handler for the virtual CPU timer. */ -static void do_cpu_timer_interrupt(unsigned int ext_int_code, +static void do_cpu_timer_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct vtimer_queue *vq; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e8fcd928dc78..b17c42df61c9 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -532,7 +532,7 @@ void pfault_fini(void) static DEFINE_SPINLOCK(pfault_lock); static LIST_HEAD(pfault_list); -static void pfault_interrupt(unsigned int ext_int_code, +static void pfault_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct task_struct *tsk; @@ -545,7 +545,7 @@ static void pfault_interrupt(unsigned int ext_int_code, * in the 'cpu address' field associated with the * external interrupt. */ - subcode = ext_int_code >> 16; + subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 9daee91e6c3f..12bea05a0fc1 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -233,8 +233,8 @@ static inline unsigned long *trailer_entry_ptr(unsigned long v) } /* prototypes for external interrupt handler and worker */ -static void hws_ext_handler(unsigned int ext_int_code, - unsigned int param32, unsigned long param64); +static void hws_ext_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64); static void worker(struct work_struct *work); @@ -673,7 +673,7 @@ int hwsampler_activate(unsigned int cpu) return rc; } -static void hws_ext_handler(unsigned int ext_int_code, +static void hws_ext_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct hws_cpu_buffer *cb; diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 46784b83c5c4..0cea7e98f464 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -229,7 +229,7 @@ dasd_diag_term_IO(struct dasd_ccw_req * cqr) } /* Handle external interruption. */ -static void dasd_ext_handler(unsigned int ext_int_code, +static void dasd_ext_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct dasd_ccw_req *cqr, *next; @@ -239,7 +239,7 @@ static void dasd_ext_handler(unsigned int ext_int_code, addr_t ip; int rc; - switch (ext_int_code >> 24) { + switch (ext_code.subcode >> 8) { case DASD_DIAG_CODE_31BIT: ip = (addr_t) param32; break; @@ -280,7 +280,7 @@ static void dasd_ext_handler(unsigned int ext_int_code, cqr->stopclk = get_clock(); expires = 0; - if ((ext_int_code & 0xff0000) == 0) { + if ((ext_code.subcode & 0xff) == 0) { cqr->status = DASD_CQR_SUCCESS; /* Start first request on queue if possible -> fast_io. */ if (!list_empty(&device->ccw_queue)) { @@ -296,7 +296,7 @@ static void dasd_ext_handler(unsigned int ext_int_code, cqr->status = DASD_CQR_QUEUED; DBF_DEV_EVENT(DBF_DEBUG, device, "interrupt status for " "request %p was %d (%d retries left)", cqr, - (ext_int_code >> 16) & 0xff, cqr->retries); + ext_code.subcode & 0xff, cqr->retries); dasd_diag_erp(device); } diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index eaa7e78186f9..30f29a0020a1 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -393,7 +393,7 @@ __sclp_find_req(u32 sccb) /* Handler for external interruption. Perform request post-processing. * Prepare read event data request if necessary. Start processing of next * request on queue. */ -static void sclp_interrupt_handler(unsigned int ext_int_code, +static void sclp_interrupt_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct sclp_req *req; @@ -818,7 +818,7 @@ EXPORT_SYMBOL(sclp_reactivate); /* Handler for external interruption used during initialization. Modify * request state to done. */ -static void sclp_check_handler(unsigned int ext_int_code, +static void sclp_check_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { u32 finished_sccb; diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 7bc1955337ea..d74e9ae6dfb3 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -380,15 +380,13 @@ static void hotplug_devices(struct work_struct *dummy) /* * we emulate the request_irq behaviour on top of s390 extints */ -static void kvm_extint_handler(unsigned int ext_int_code, +static void kvm_extint_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct virtqueue *vq; - u16 subcode; u32 param; - subcode = ext_int_code >> 16; - if ((subcode & 0xff00) != VIRTIO_SUBCODE_64) + if ((ext_code.subcode & 0xff00) != VIRTIO_SUBCODE_64) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_VRT]++; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 403be43b793d..3ad1f9db5f8b 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1800,7 +1800,7 @@ static void iucv_work_fn(struct work_struct *work) * Handles external interrupts coming in from CP. * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). */ -static void iucv_external_interrupt(unsigned int ext_int_code, +static void iucv_external_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct iucv_irq_data *p; -- cgit v1.2.3 From 3ab121ab18669226742891416fe7ecc86dadb047 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 11 Mar 2012 11:59:32 -0400 Subject: [S390] kernel: Add z/VM LGR detection Currently the following mechanisms are available to move active Linux on System z instances between machines: * z/VM 6.2 SSI (Single System Image) * Suspend/resume For moving Linux instances in this patch the term LGR (Linux Guest Relocation) is used. Because such an operation is critical, it should be detectable from Linux. With this patch for both, a live system and a kernel dump, the information about LGRs is accessible. To identify a guest, stsi and stfle data is used. A new function lgr_info_log() compares the current data (lgr_info_cur) with the last recorded one (lgr_info_last). In case the two data sets differ, lgr_info_cur is logged to the "lgr" s390dbf. The following trigger points call lgr_info_log(): * panic * die * kdump * LGR timer * PSW restart * QDIO recovery * resume This patch also changes the s390dbf hex_ascii view. Now only printable ASCII characters are shown. Reviewed-by: Heiko Carstens Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/debug.h | 1 + arch/s390/include/asm/ipl.h | 1 + arch/s390/include/asm/system.h | 34 +++++++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/debug.c | 40 ++++++-- arch/s390/kernel/early.c | 22 +---- arch/s390/kernel/ipl.c | 6 ++ arch/s390/kernel/lgr.c | 200 +++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 9 +- arch/s390/kernel/smp.c | 2 + arch/s390/kernel/swsusp_asm64.S | 3 + arch/s390/kernel/traps.c | 2 + drivers/s390/cio/qdio_main.c | 6 ++ 13 files changed, 297 insertions(+), 31 deletions(-) create mode 100644 arch/s390/kernel/lgr.c (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 9d88db1f55d0..8a8245ed14d2 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -131,6 +131,7 @@ void debug_unregister(debug_info_t* id); void debug_set_level(debug_info_t* id, int new_level); +void debug_set_critical(void); void debug_stop_all(void); static inline debug_entry_t* diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 6940abfbe1d9..2bd6cb897b90 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -169,5 +169,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); extern void store_status(void); +extern void lgr_info_log(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index d73cc6b60000..2e0bb7f0f9b2 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -7,8 +7,10 @@ #ifndef __ASM_SYSTEM_H #define __ASM_SYSTEM_H +#include #include #include +#include #include #include #include @@ -248,6 +250,38 @@ static inline int test_facility(unsigned long nr) return (*ptr & (0x80 >> (nr & 7))) != 0; } +/** + * stfle - Store facility list extended + * @stfle_fac_list: array where facility list can be stored + * @size: size of passed in array in double words + */ +static inline void stfle(u64 *stfle_fac_list, int size) +{ + unsigned long nr; + + preempt_disable(); + S390_lowcore.stfl_fac_list = 0; + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b) + : "=m" (S390_lowcore.stfl_fac_list)); + nr = 4; /* bytes stored by stfl */ + memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + if (S390_lowcore.stfl_fac_list & 0x01000000) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); + preempt_enable(); +} + static inline unsigned short stap(void) { unsigned short cpu_address; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index d0a48268eb27..b21595090499 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ - sysinfo.o jump_label.o + sysinfo.o jump_label.o lgr.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6848828b962e..19e5e9eba546 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,8 +2,8 @@ * arch/s390/kernel/debug.c * S/390 debug facility * - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 1999, 2012 + * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) * @@ -167,6 +167,7 @@ static debug_info_t *debug_area_last = NULL; static DEFINE_MUTEX(debug_mutex); static int initialized; +static int debug_critical; static const struct file_operations debug_file_ops = { .owner = THIS_MODULE, @@ -932,6 +933,11 @@ debug_stop_all(void) } +void debug_set_critical(void) +{ + debug_critical = 1; +} + /* * debug_event_common: * - write debug entry with given size @@ -945,7 +951,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -968,7 +978,11 @@ debug_entry_t if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -1013,7 +1027,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return NULL; numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); va_start(ap,string); @@ -1047,7 +1065,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); va_start(ap,string); @@ -1428,10 +1450,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, rc += sprintf(out_buf + rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; - if (!isprint(c)) - rc += sprintf(out_buf + rc, "."); - else + if (isascii(c) && isprint(c)) rc += sprintf(out_buf + rc, "%c", c); + else + rc += sprintf(out_buf + rc, "."); } rc += sprintf(out_buf + rc, "\n"); return rc; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 52098d6dfaa7..578eb4e6d157 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "entry.h" /* @@ -262,25 +263,8 @@ static noinline __init void setup_lowcore_early(void) static noinline __init void setup_facility_list(void) { - unsigned long nr; - - S390_lowcore.stfl_fac_list = 0; - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); - memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); - nr = 4; /* # bytes stored by stfl */ - if (test_facility(7)) { - /* More facility bits available with stfle */ - register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) - : : "cc"); - nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ - } - memset((char *) S390_lowcore.stfle_fac_list + nr, 0, - MAX_FACILITY_BIT/8 - nr); + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); } static noinline __init void setup_hpage(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 9e2f6f7c0e5a..153e21ce2336 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -1692,6 +1694,7 @@ static struct kobj_attribute on_panic_attr = static void do_panic(void) { + lgr_info_log(); on_panic_trigger.action->fn(&on_panic_trigger); stop_run(&on_panic_trigger); } @@ -1729,6 +1732,9 @@ static void __do_restart(void *ignore) void do_restart(void) { + tracing_off(); + debug_locks_off(); + lgr_info_log(); smp_call_online_cpu(__do_restart, NULL); } diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 000000000000..8431b92ca3ae --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,200 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { + /* Bit field with facility information: 4 DWORDs are stored */ + u64 stfle_fac_list[4]; + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ + u32 level; + /* Level 1: CEC info (stsi 1.1.1) */ + char manufacturer[16]; + char type[4]; + char sequence[16]; + char plant[4]; + char model[16]; + /* Level 2: LPAR info (stsi 2.2.2) */ + u16 lpar_number; + char name[8]; + /* Level 3: VM info (stsi 3.2.2) */ + u8 vm_count; + struct { + char name[8]; + char cpi[16]; + } vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static void *lgr_page; +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Return number of valid stsi levels + */ +static inline int stsi_0(void) +{ + int rc = stsi(NULL, 0, 0, 0); + + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); +} + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ + memcpy(dst, src, size); + EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ + struct sysinfo_1_1_1 *si = lgr_page; + + if (stsi(si, 1, 1, 1) == -ENOSYS) + return; + cpascii(lgr_info->manufacturer, si->manufacturer, + sizeof(si->manufacturer)); + cpascii(lgr_info->type, si->type, sizeof(si->type)); + cpascii(lgr_info->model, si->model, sizeof(si->model)); + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_2_2_2 *si = lgr_page; + + if (stsi(si, 2, 2, 2) == -ENOSYS) + return; + cpascii(lgr_info->name, si->name, sizeof(si->name)); + memcpy(&lgr_info->lpar_number, &si->lpar_number, + sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_3_2_2 *si = lgr_page; + int i; + + if (stsi(si, 3, 2, 2) == -ENOSYS) + return; + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { + cpascii(lgr_info->vm[i].name, si->vm[i].name, + sizeof(si->vm[i].name)); + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, + sizeof(si->vm[i].cpi)); + } + lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ + memset(lgr_info, 0, sizeof(*lgr_info)); + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); + lgr_info->level = stsi_0(); + if (lgr_info->level == -ENOSYS) + return; + if (lgr_info->level >= 1) + lgr_stsi_1_1_1(lgr_info); + if (lgr_info->level >= 2) + lgr_stsi_2_2_2(lgr_info); + if (lgr_info->level >= 3) + lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ + static DEFINE_SPINLOCK(lgr_info_lock); + unsigned long flags; + + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) + return; + lgr_info_get(&lgr_info_cur); + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); + lgr_info_last = lgr_info_cur; + } + spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ + lgr_info_log(); + lgr_timer_set(); +} + +static struct timer_list lgr_timer = + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ + lgr_page = (void *) __get_free_pages(GFP_KERNEL, 0); + if (!lgr_page) + return -ENOMEM; + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); + if (!lgr_dbf) { + free_page((unsigned long) lgr_page); + return -ENOMEM; + } + debug_register_view(lgr_dbf, &debug_hex_ascii_view); + lgr_info_get(&lgr_info_last); + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + lgr_timer_set(); + return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index bf6fbc03ebaf..0f8cdf1268d0 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -209,10 +210,14 @@ static void __machine_kexec(void *data) struct kimage *image = data; pfault_fini(); - if (image->type == KEXEC_TYPE_CRASH) + tracing_off(); + debug_locks_off(); + if (image->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); s390_reset_system(__do_machine_kdump, data); - else + } else { s390_reset_system(__do_machine_kexec, data); + } disabled_wait((unsigned long) __builtin_return_address(0)); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 734e644972ab..d15b6c937088 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -406,6 +407,7 @@ void smp_send_stop(void) __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); + debug_set_critical(); cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index ad3c79eceed7..dd70ef046058 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -257,6 +257,9 @@ restore_registers: lghi %r2,0 brasl %r14,arch_set_page_states + /* Log potential guest relocation */ + brasl %r14,lgr_info_log + /* Reinitialize the channel subsystem */ brasl %r14,channel_subsystem_reinit diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5ce3750b181f..8894ac435d8d 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "entry.h" void (*pgm_check_table[128])(struct pt_regs *regs); @@ -239,6 +240,7 @@ void die(struct pt_regs *regs, const char *str) static int die_counter; oops_enter(); + lgr_info_log(); debug_stop_all(); console_verbose(); spin_lock_irq(&die_lock); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 770a740a393c..2a0dfcb0bc42 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cio.h" #include "css.h" @@ -1093,6 +1094,11 @@ static void qdio_handle_activate_check(struct ccw_device *cdev, q->nr, q->first_to_kick, count, irq_ptr->int_parm); no_handler: qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); + /* + * In case of z/VM LGR (Live Guest Migration) QDIO recovery will happen. + * Therefore we call the LGR detection function here. + */ + lgr_info_log(); } static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, -- cgit v1.2.3 From 4857d4bbe9821c8d732cb84455e18e12b3d79add Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 11 Mar 2012 11:59:34 -0400 Subject: [S390] kernel: Add OS info memory interface In order to allow kdump based stand-alone dump, some information has to be passed from the old kernel to the new dump kernel. This is done via a the struct "os_info" that contains the following fields: * crashkernel base and size * reipl block * vmcoreinfo * init function A pointer to os_info is stored at a well known storage location and the whole structure as well as all fields are secured with checksums. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 8 +- arch/s390/include/asm/os_info.h | 50 ++++++++++++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/crash_dump.c | 37 ++++++--- arch/s390/kernel/ipl.c | 42 ++++++++-- arch/s390/kernel/os_info.c | 169 ++++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/setup.c | 3 + arch/s390/kernel/smp.c | 13 ++++ 8 files changed, 304 insertions(+), 20 deletions(-) create mode 100644 arch/s390/include/asm/os_info.h create mode 100644 arch/s390/kernel/os_info.c (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4e69563bc956..a47c6e221a95 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -154,7 +154,9 @@ struct _lowcore { __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ __u32 vmcore_info; /* 0x0e08 */ - __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + __u8 pad_0x0e0c[0x0e18-0x0e0c]; /* 0x0e0c */ + __u32 os_info; /* 0x0e18 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -301,7 +303,9 @@ struct _lowcore { __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ __u64 vmcore_info; /* 0x0e0c */ - __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ + __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ + __u64 os_info; /* 0x0e18 */ + __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h new file mode 100644 index 000000000000..d07518af09ea --- /dev/null +++ b/arch/s390/include/asm/os_info.h @@ -0,0 +1,50 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu + */ +#ifndef _ASM_S390_OS_INFO_H +#define _ASM_S390_OS_INFO_H + +#define OS_INFO_VERSION_MAJOR 1 +#define OS_INFO_VERSION_MINOR 1 +#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */ + +#define OS_INFO_VMCOREINFO 0 +#define OS_INFO_REIPL_BLOCK 1 +#define OS_INFO_INIT_FN 2 + +struct os_info_entry { + u64 addr; + u64 size; + u32 csum; +} __packed; + +struct os_info { + u64 magic; + u32 csum; + u16 version_major; + u16 version_minor; + u64 crashkernel_addr; + u64 crashkernel_size; + struct os_info_entry entry[3]; + u8 reserved[4004]; +} __packed; + +void os_info_init(void); +void os_info_entry_add(int nr, void *ptr, u64 len); +void os_info_crashkernel_add(unsigned long base, unsigned long size); +u32 os_info_csum(struct os_info *os_info); + +#ifdef CONFIG_CRASH_DUMP +void *os_info_old_entry(int nr, unsigned long *size); +int copy_from_oldmem(void *dest, void *src, size_t count); +#else +static inline void *os_info_old_entry(int nr, unsigned long *size) +{ + return NULL; +} +#endif + +#endif /* _ASM_S390_OS_INFO_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b21595090499..16b0b433f1f4 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ - sysinfo.o jump_label.o lgr.o + sysinfo.o jump_label.o lgr.o os_info.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c383ce440d99..cc1172b26873 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -14,6 +14,7 @@ #include #include #include +#include #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) @@ -51,7 +52,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, /* * Copy memory from old kernel */ -static int copy_from_oldmem(void *dest, void *src, size_t count) +int copy_from_oldmem(void *dest, void *src, size_t count) { unsigned long copied = 0; int rc; @@ -224,28 +225,44 @@ static void *nt_prpsinfo(void *ptr) } /* - * Initialize vmcoreinfo note (new kernel) + * Get vmcoreinfo using lowcore->vmcore_info (new kernel) */ -static void *nt_vmcoreinfo(void *ptr) +static void *get_vmcoreinfo_old(unsigned long *size) { char nt_name[11], *vmcoreinfo; Elf64_Nhdr note; void *addr; if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) - return ptr; + return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_from_oldmem(¬e, addr, sizeof(note))) - return ptr; + return NULL; if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) - return ptr; + return NULL; if (strcmp(nt_name, "VMCOREINFO") != 0) - return ptr; - vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + return NULL; + vmcoreinfo = kzalloc_panic(note.n_descsz); if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return NULL; + *size = note.n_descsz; + return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (!vmcoreinfo) + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) return ptr; - vmcoreinfo[note.n_descsz + 1] = 0; - return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); + return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } /* diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 153e21ce2336..8342e65a140d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -951,6 +952,13 @@ static struct attribute_group reipl_nss_attr_group = { .attrs = reipl_nss_attrs, }; +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ + reipl_block_actual = reipl_block; + os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block->hdr.len); +} + /* reipl type */ static int reipl_set_type(enum ipl_type type) @@ -966,7 +974,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_CCW_VM; else reipl_method = REIPL_METHOD_CCW_CIO; - reipl_block_actual = reipl_block_ccw; + set_reipl_block_actual(reipl_block_ccw); break; case IPL_TYPE_FCP: if (diag308_set_works) @@ -975,7 +983,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_FCP_RO_VM; else reipl_method = REIPL_METHOD_FCP_RO_DIAG; - reipl_block_actual = reipl_block_fcp; + set_reipl_block_actual(reipl_block_fcp); break; case IPL_TYPE_FCP_DUMP: reipl_method = REIPL_METHOD_FCP_DUMP; @@ -985,7 +993,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_NSS_DIAG; else reipl_method = REIPL_METHOD_NSS; - reipl_block_actual = reipl_block_nss; + set_reipl_block_actual(reipl_block_nss); break; case IPL_TYPE_UNKNOWN: reipl_method = REIPL_METHOD_DEFAULT; @@ -1257,6 +1265,29 @@ static int __init reipl_fcp_init(void) return 0; } +static int __init reipl_type_init(void) +{ + enum ipl_type reipl_type = ipl_info.type; + struct ipl_parameter_block *reipl_block; + unsigned long size; + + reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); + if (!reipl_block) + goto out; + /* + * If we have an OS info reipl block, this will be used + */ + if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, reipl_block, size); + reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + memcpy(reipl_block_ccw, reipl_block, size); + reipl_type = IPL_TYPE_CCW; + } +out: + return reipl_set_type(reipl_type); +} + static int __init reipl_init(void) { int rc; @@ -1278,10 +1309,7 @@ static int __init reipl_init(void) rc = reipl_nss_init(); if (rc) return rc; - rc = reipl_set_type(ipl_info.type); - if (rc) - return rc; - return 0; + return reipl_type_init(); } static struct shutdown_action __refdata reipl_action = { diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 000000000000..bbe522672e06 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,169 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ + int size = sizeof(*os_info) - offsetof(struct os_info, version_major); + return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ + os_info.crashkernel_addr = (u64)(unsigned long)base; + os_info.crashkernel_size = (u64)(unsigned long)size; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ + os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].size = size; + os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ + void *ptr = &os_info; + + os_info.version_major = OS_INFO_VERSION_MAJOR; + os_info.version_minor = OS_INFO_VERSION_MINOR; + os_info.magic = OS_INFO_MAGIC; + os_info.csum = os_info_csum(&os_info); + copy_to_absolute_zero(&S390_lowcore.os_info, &ptr, sizeof(ptr)); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ + unsigned long addr, size = 0; + char *buf, *buf_align, *msg; + u32 csum; + + addr = os_info_old->entry[nr].addr; + if (!addr) { + msg = "not available"; + goto fail; + } + size = os_info_old->entry[nr].size; + buf = kmalloc(size + align - 1, GFP_KERNEL); + if (!buf) { + msg = "alloc failed"; + goto fail; + } + buf_align = PTR_ALIGN(buf, align); + if (copy_from_oldmem(buf_align, (void *) addr, size)) { + msg = "copy failed"; + goto fail_free; + } + csum = csum_partial(buf_align, size, 0); + if (csum != os_info_old->entry[nr].csum) { + msg = "checksum failed"; + goto fail_free; + } + os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; + msg = "copied"; + goto out; +fail_free: + kfree(buf); +fail: + os_info_old->entry[nr].addr = 0; +out: + pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", + nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ + static int os_info_init; + unsigned long addr; + + if (os_info_init) + return; + if (!OLDMEM_BASE) + goto fail; + if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + goto fail; + if (addr == 0 || addr % PAGE_SIZE) + goto fail; + os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); + if (!os_info_old) + goto fail; + if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + goto fail_free; + if (os_info_old->magic != OS_INFO_MAGIC) + goto fail_free; + if (os_info_old->csum != os_info_csum(os_info_old)) + goto fail_free; + if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) + goto fail_free; + os_info_old_alloc(OS_INFO_VMCOREINFO, 1); + os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); + os_info_old_alloc(OS_INFO_INIT_FN, PAGE_SIZE); + pr_info("crashkernel: addr=0x%lx size=%lu\n", + (unsigned long) os_info_old->crashkernel_addr, + (unsigned long) os_info_old->crashkernel_size); + os_info_init = 1; + return; +fail_free: + kfree(os_info_old); +fail: + os_info_init = 1; + os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ + os_info_old_init(); + + if (!os_info_old) + return NULL; + if (!os_info_old->entry[nr].addr) + return NULL; + *size = (unsigned long) os_info_old->entry[nr].size; + return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9a3edb5f2c92..38e751278bf7 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "entry.h" long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | @@ -778,6 +779,7 @@ static void __init reserve_crashkernel(void) pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); + os_info_crashkernel_add(crash_base, crash_size); #endif } @@ -1057,6 +1059,7 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + os_info_init(); setup_ipl(); setup_memory_end(); setup_addressing_mode(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index d15b6c937088..3b9e5c9f4c0b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -826,6 +827,17 @@ void __noreturn cpu_die(void) #endif /* CONFIG_HOTPLUG_CPU */ +static void smp_call_os_info_init_fn(void) +{ + int (*init_fn)(void); + unsigned long size; + + init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size); + if (!init_fn) + return; + init_fn(); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { /* request the 0x1201 emergency signal external interrupt */ @@ -834,6 +846,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1202 external call external interrupt */ if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1202"); + smp_call_os_info_init_fn(); smp_detect_cpus(); } -- cgit v1.2.3 From f5bfa159d20c9a25c44a7f848c70d4156f78ba64 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 11 Mar 2012 11:59:38 -0400 Subject: [S390] Ensure that vmcore_info pointer is never accessed directly Because the vmcore_info pointer is not 8 byte aligned it never should not be accessed directly. The reason is that the compiler assumes that 64 bit pointer are always double word aligned. To ensure save access, the vmcore_info type in struct lowcore is changed from u64 to an u8[8] array and a comment is added. Signed-off-by: Michael Holzheu Reported-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index a47c6e221a95..47853debb3b9 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -302,7 +302,12 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u64 vmcore_info; /* 0x0e0c */ + /* + * Because the vmcore_info pointer is not 8 byte aligned it never + * should not be accessed directly. For accessing the pointer, first + * copy it to a local pointer variable. + */ + __u8 vmcore_info[8]; /* 0x0e0c */ __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ __u64 os_info; /* 0x0e18 */ __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ -- cgit v1.2.3 From b03d541aa45b52e1b723890121a9fe3920eb438b Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Fri, 23 Mar 2012 11:13:05 +0100 Subject: [S390] oprofile: Allow multiple users of the measurement alert interrupt Prepare the measurement facility which is currently only used by oprofile for multiple users. To achieve that the measurement alert interrupt control bit needs to be protected. The measurement alert definitions are moved to a header file and an interrupt mask is added so that users can discard interrupts if they are for a different measurement subsystem. Reviewed-by: Hendrik Brueckner Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 12 ++++++++++ arch/s390/include/asm/irq.h | 2 ++ arch/s390/kernel/irq.c | 23 +++++++++++++++++++ arch/s390/oprofile/hwsampler.c | 52 ++++++++++++++++++------------------------ 4 files changed, 59 insertions(+), 30 deletions(-) create mode 100644 arch/s390/include/asm/cpu_mf.h (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h new file mode 100644 index 000000000000..d91dd38112de --- /dev/null +++ b/arch/s390/include/asm/cpu_mf.h @@ -0,0 +1,12 @@ +#ifndef _ASM_S390_CPU_MF_H +#define _ASM_S390_CPU_MF_H + +#define CPU_MF_INT_SF_MASK 0xffc00000 + +#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ +#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ +#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ +#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ +#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ + +#endif diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index acee1806f61e..5289cacd4861 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -45,5 +45,7 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler); int unregister_external_interrupt(u16 code, ext_int_handler_t handler); void service_subclass_irq_register(void); void service_subclass_irq_unregister(void); +void measurement_alert_subclass_register(void); +void measurement_alert_subclass_unregister(void); #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 2429ecd68872..1c2cdd59ccd0 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -255,3 +255,26 @@ void service_subclass_irq_unregister(void) spin_unlock(&sc_irq_lock); } EXPORT_SYMBOL(service_subclass_irq_unregister); + +static DEFINE_SPINLOCK(ma_subclass_lock); +static int ma_subclass_refcount; + +void measurement_alert_subclass_register(void) +{ + spin_lock(&ma_subclass_lock); + if (!ma_subclass_refcount) + ctl_set_bit(0, 5); + ma_subclass_refcount++; + spin_unlock(&ma_subclass_lock); +} +EXPORT_SYMBOL(measurement_alert_subclass_register); + +void measurement_alert_subclass_unregister(void) +{ + spin_lock(&ma_subclass_lock); + ma_subclass_refcount--; + if (!ma_subclass_refcount) + ctl_clear_bit(0, 5); + spin_unlock(&ma_subclass_lock); +} +EXPORT_SYMBOL(measurement_alert_subclass_unregister); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 12bea05a0fc1..f097d516d8c5 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -17,8 +17,7 @@ #include #include #include - -#include +#include #include #include "hwsampler.h" @@ -30,12 +29,6 @@ #define ALERT_REQ_MASK 0x4000000000000000ul #define BUFFER_FULL_MASK 0x8000000000000000ul -#define EI_IEA (1 << 31) /* invalid entry address */ -#define EI_ISE (1 << 30) /* incorrect SDBT entry */ -#define EI_PRA (1 << 29) /* program request alert */ -#define EI_SACA (1 << 23) /* sampler authorization change alert */ -#define EI_LSDA (1 << 22) /* loss of sample data alert */ - DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); struct hws_execute_parms { @@ -232,9 +225,20 @@ static inline unsigned long *trailer_entry_ptr(unsigned long v) return (unsigned long *) ret; } -/* prototypes for external interrupt handler and worker */ static void hws_ext_handler(struct ext_code ext_code, - unsigned int param32, unsigned long param64); + unsigned int param32, unsigned long param64) +{ + struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + + if (!(param32 & CPU_MF_INT_SF_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} static void worker(struct work_struct *work); @@ -673,18 +677,6 @@ int hwsampler_activate(unsigned int cpu) return rc; } -static void hws_ext_handler(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct hws_cpu_buffer *cb; - - kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; - cb = &__get_cpu_var(sampler_cpu_buffer); - atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); - if (hws_wq) - queue_work(hws_wq, &cb->worker); -} - static int check_qsi_on_setup(void) { int rc; @@ -760,23 +752,23 @@ static int worker_check_error(unsigned int cpu, int ext_params) if (!sdbt || !*sdbt) return -EINVAL; - if (ext_params & EI_PRA) + if (ext_params & CPU_MF_INT_SF_PRA) cb->req_alert++; - if (ext_params & EI_LSDA) + if (ext_params & CPU_MF_INT_SF_LSDA) cb->loss_of_sample_data++; - if (ext_params & EI_IEA) { + if (ext_params & CPU_MF_INT_SF_IAE) { cb->invalid_entry_address++; rc = -EINVAL; } - if (ext_params & EI_ISE) { + if (ext_params & CPU_MF_INT_SF_ISE) { cb->incorrect_sdbt_entry++; rc = -EINVAL; } - if (ext_params & EI_SACA) { + if (ext_params & CPU_MF_INT_SF_SACA) { cb->sample_auth_change_alert++; rc = -EINVAL; } @@ -1009,7 +1001,7 @@ int hwsampler_deallocate(void) if (hws_state != HWS_STOPPED) goto deallocate_exit; - ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + measurement_alert_subclass_unregister(); deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1123,7 +1115,7 @@ int hwsampler_shutdown(void) mutex_lock(&hws_sem); if (hws_state == HWS_STOPPED) { - ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + measurement_alert_subclass_unregister(); deallocate_sdbt(); } if (hws_wq) { @@ -1198,7 +1190,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ - ctl_set_bit(0, 5); /* set CR0 bit 58 */ + measurement_alert_subclass_register(); return 0; } -- cgit v1.2.3 From 212188a596d17d519842ef2173150315735b54e1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 23 Mar 2012 11:13:06 +0100 Subject: [S390] perf: add support for s390x CPU counters Add a perf PMU to access the CPU-measurement counter facility CPUM CF. CPUM CF provides multiple counter sets for measuring generic, problem-state, and crypto activaties. Also an extended counter set for the IBM System z10 and IBM z196 mainframes is available. Counters from the basic and problem-state counter set are mapped to generic perf hardware events. Other counters are accessible through raw events. For a list of available counter sets and counters, see: - The Load-Program-Parameter and the CPU-Measurement Facilities (SA23-2260) - The CPU-Measurement Facility Extended Counters Definition for z10 and z196 (SA23-2261) Reviewed-by: Jan Glauber Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 89 ++++- arch/s390/include/asm/perf_event.h | 12 +- arch/s390/kernel/Makefile | 1 + arch/s390/kernel/perf_cpum_cf.c | 690 +++++++++++++++++++++++++++++++++++++ arch/s390/kernel/perf_event.c | 125 +++++++ 5 files changed, 912 insertions(+), 5 deletions(-) create mode 100644 arch/s390/kernel/perf_cpum_cf.c create mode 100644 arch/s390/kernel/perf_event.c (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index d91dd38112de..e49db5d5d06f 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -1,12 +1,95 @@ +/* + * CPU-measurement facilities + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner + * Jan Glauber + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H -#define CPU_MF_INT_SF_MASK 0xffc00000 - #define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ #define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ #define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ #define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ +#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ + +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ + CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ + CPU_MF_INT_SF_LSDA) + +/* CPU measurement facility support */ +static inline int cpum_cf_avail(void) +{ + return MACHINE_HAS_SPP && test_facility(67); +} + +static inline int cpum_sf_avail(void) +{ + return MACHINE_HAS_SPP && test_facility(68); +} + + +struct cpumf_ctr_info { + u16 cfvn; + u16 auth_ctl; + u16 enable_ctl; + u16 act_ctl; + u16 max_cpu; + u16 csvn; + u16 max_cg; + u16 reserved1; + u32 reserved2[12]; +} __packed; + +/* Query counter information */ +static inline int qctri(struct cpumf_ctr_info *info) +{ + int rc = -EINVAL; + + asm volatile ( + "0: .insn s,0xb28e0000,%1\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(1b, 2b) + : "+d" (rc), "=Q" (*info)); + return rc; +} + +/* Load CPU-counter-set controls */ +static inline int lcctl(u64 ctl) +{ + int cc; + + asm volatile ( + " .insn s,0xb2840000,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "m" (ctl) : "cc"); + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + register u64 content asm("4") = 0; + int cc; + + asm volatile ( + " .insn rre,0xb2e40000,%0,%2\n" + " ipm %1\n" + " srl %1,28\n" + : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + if (!cc) + *val = content; + return cc; +} -#endif +#endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 4eb444edbe49..7941968e12b4 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,8 +1,16 @@ /* * Performance event support - s390 specific definitions. * - * Copyright 2009 Martin Schwidefsky, IBM Corporation. + * Copyright IBM Corp. 2009, 2012 + * Author(s): Martin Schwidefsky + * Hendrik Brueckner */ -/* Empty, just to avoid compiling error */ +#include +/* CPU-measurement counter facility */ +#define PERF_CPUM_CF_MAX_CTR 160 + +/* Per-CPU flags for PMU states */ +#define PMU_F_RESERVED 0x1000 +#define PMU_F_ENABLED 0x2000 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 16b0b433f1f4..884b18afc864 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c new file mode 100644 index 000000000000..8481ecf2ad71 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -0,0 +1,690 @@ +/* + * Performance event support for s390x - CPU-measurement Counter Facility + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_cf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* CPU-measurement counter facility supports these CPU counter sets: + * For CPU counter sets: + * Basic counter set: 0-31 + * Problem-state counter set: 32-63 + * Crypto-activity counter set: 64-127 + * Extented counter set: 128-159 + */ +enum cpumf_ctr_set { + /* CPU counter sets */ + CPUMF_CTR_SET_BASIC = 0, + CPUMF_CTR_SET_USER = 1, + CPUMF_CTR_SET_CRYPTO = 2, + CPUMF_CTR_SET_EXT = 3, + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, +}; + +static void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +/* Local CPUMF event structure */ +struct cpu_hw_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + }, + .state = 0, + .flags = 0, +}; + +static int get_counter_set(u64 event) +{ + int set = -1; + + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 160) + set = CPUMF_CTR_SET_EXT; + + return set; +} + +static int validate_event(const struct hw_perf_event *hwc) +{ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + /* check for reserved counters */ + if ((hwc->config >= 6 && hwc->config <= 31) || + (hwc->config >= 38 && hwc->config <= 63) || + (hwc->config >= 80 && hwc->config <= 127)) + return -EOPNOTSUPP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int validate_ctr_version(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check required version for counter sets */ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpuhw->info.cfvn < 1) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + if (cpuhw->info.csvn < 1) + err = -EOPNOTSUPP; + break; + } + + put_cpu_var(cpu_hw_events); + return err; +} + +static int validate_ctr_auth(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + u64 ctrs_state; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check authorization for cpu counter sets */ + ctrs_state = cpumf_state_ctl[hwc->config_base]; + if (!(ctrs_state & cpuhw->info.auth_ctl)) + err = -EPERM; + + put_cpu_var(cpu_hw_events); + return err; +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%lx\n", err); + return; + } + + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + u64 inactive; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%lx\n", err); + return; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +/* Initialize the CPU-measurement facility */ +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + measurement_alert_subclass_register(); + + return 0; +} + +/* Release the CPU-measurement facility */ +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + measurement_alert_subclass_unregister(); +} + +/* Release the PMU if event is the last perf event */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ +static const int cpumf_generic_events_basic[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; +/* CPUMF <-> perf event mappings for userspace (problem-state set) */ +static const int cpumf_generic_events_user[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 32, + [PERF_COUNT_HW_INSTRUCTIONS] = 33, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + u64 ev; + + switch (attr->type) { + case PERF_TYPE_RAW: + /* Raw events are used to access counters directly, + * hence do not permit excludes */ + if (attr->exclude_kernel || attr->exclude_user || + attr->exclude_hv) + return -EOPNOTSUPP; + ev = attr->config; + break; + + case PERF_TYPE_HARDWARE: + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + + /* No support for kernel space counters only */ + } else if (!attr->exclude_kernel && attr->exclude_user) { + return -EOPNOTSUPP; + + /* Count user and kernel space */ + } else { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } + break; + + default: + return -ENOENT; + } + + if (ev == -1) + return -ENOENT; + + if (ev >= PERF_CPUM_CF_MAX_CTR) + return -EINVAL; + + /* The CPU measurement counter facility does not have any interrupts + * to do sampling. Sampling must be provided by external means, + * for example, by timers. + */ + if (hwc->sample_period) + return -EINVAL; + + /* Use the hardware perf event structure to store the counter number + * in 'config' member and the counter set to which the counter belongs + * in the 'config_base'. The counter set (config_base) is then used + * to enable/disable the counters. + */ + hwc->config = ev; + hwc->config_base = get_counter_set(ev); + + /* Validate the counter that is assigned to this event. + * Because the counter facility can use numerous counters at the + * same time without constraints, it is not necessary to explicity + * validate event groups (event->group_leader != event). + */ + err = validate_event(hwc); + if (err) + return err; + + /* Initialize for using the CPU-measurement counter facility */ + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + /* Finally, validate version and authorization of the counter set */ + err = validate_ctr_auth(hwc); + if (!err) + err = validate_ctr_version(hwc); + + return err; +} + +static int cpumf_pmu_event_init(struct perf_event *event) +{ + int err; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + err = __hw_perf_event_init(event); + break; + default: + return -ENOENT; + } + + if (unlikely(err) && event->destroy) + event->destroy(event); + + return err; +} + +static int hw_perf_event_reset(struct perf_event *event) +{ + u64 prev, new; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) { + if (err != 3) + break; + /* The counter is not (yet) available. This + * might happen if the counter set to which + * this counter belongs is in the disabled + * state. + */ + new = 0; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + return err; +} + +static int hw_perf_event_update(struct perf_event *event) +{ + u64 prev, new, delta; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) + goto out; + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + delta = (prev <= new) ? new - prev + : (-1ULL - prev) + new + 1; /* overflow */ + local64_add(delta, &event->count); +out: + return err; +} + +static void cpumf_pmu_read(struct perf_event *event) +{ + if (event->hw.state & PERF_HES_STOPPED) + return; + + hw_perf_event_update(event); +} + +static void cpumf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->config == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* (Re-)enable and activate the counter set */ + ctr_set_enable(&cpuhw->state, hwc->config_base); + ctr_set_start(&cpuhw->state, hwc->config_base); + + /* The counter set to which this counter belongs can be already active. + * Because all counters in a set are active, the event->hw.prev_count + * needs to be synchronized. At this point, the counter set can be in + * the inactive or disabled state. + */ + hw_perf_event_reset(event); + + /* increment refcount for this counter set */ + atomic_inc(&cpuhw->ctr_set[hwc->config_base]); +} + +static void cpumf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* Decrement reference count for this counter set and if this + * is the last used counter in the set, clear activation + * control and set the counter set state to inactive. + */ + if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) + ctr_set_stop(&cpuhw->state, hwc->config_base); + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static int cpumf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Check authorization for the counter set to which this + * counter belongs. + * For group events transaction, the authorization check is + * done in cpumf_pmu_commit_txn(). + */ + if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (validate_ctr_auth(&event->hw)) + return -EPERM; + + ctr_set_enable(&cpuhw->state, event->hw.config_base); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cpumf_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void cpumf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpumf_pmu_stop(event, PERF_EF_UPDATE); + + /* Check if any counter in the counter set is still used. If not used, + * change the counter set to the disabled state. This also clears the + * content of all counters in the set. + * + * When a new perf event has been added but not yet started, this can + * clear enable control and resets all counters in a set. Therefore, + * cpumf_pmu_start() always has to reenable a counter set. + */ + if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) + ctr_set_disable(&cpuhw->state, event->hw.config_base); + + perf_event_update_userpage(event); +} + +/* + * Start group events scheduling transaction. + * Set flags to perform a single test at commit time. + */ +static void cpumf_pmu_start_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + perf_pmu_disable(pmu); + cpuhw->flags |= PERF_EVENT_TXN; + cpuhw->tx_state = cpuhw->state; +} + +/* + * Stop and cancel a group events scheduling tranctions. + * Assumes cpumf_pmu_del() is called for each successful added + * cpumf_pmu_add() during the transaction. + */ +static void cpumf_pmu_cancel_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + WARN_ON(cpuhw->tx_state != cpuhw->state); + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); +} + +/* + * Commit the group events scheduling transaction. On success, the + * transaction is closed. On error, the transaction is kept open + * until cpumf_pmu_cancel_txn() is called. + */ +static int cpumf_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u64 state; + + /* check if the updated state can be scheduled */ + state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + state >>= CPUMF_LCCTL_ENABLE_SHIFT; + if ((state & cpuhw->info.auth_ctl) != state) + return -EPERM; + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); + return 0; +} + +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, + .start_txn = cpumf_pmu_start_txn, + .commit_txn = cpumf_pmu_commit_txn, + .cancel_txn = cpumf_pmu_cancel_txn, +}; + +static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_interrupt(0x1407, cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + goto out; + } + + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + unregister_external_interrupt(0x1407, cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return rc; +} +early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c new file mode 100644 index 000000000000..609f985198cf --- /dev/null +++ b/arch/s390/kernel/perf_event.c @@ -0,0 +1,125 @@ +/* + * Performance event support for s390x + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "perf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const char *perf_pmu_name(void) +{ + if (cpum_cf_avail() || cpum_sf_avail()) + return "CPU-measurement facilities (CPUMF)"; + return "pmu"; +} +EXPORT_SYMBOL(perf_pmu_name); + +int perf_num_counters(void) +{ + int num = 0; + + if (cpum_cf_avail()) + num += PERF_CPUM_CF_MAX_CTR; + + return num; +} +EXPORT_SYMBOL(perf_num_counters); + +void perf_event_print_debug(void) +{ + struct cpumf_ctr_info cf_info; + unsigned long flags; + int cpu; + + if (!cpum_cf_avail()) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + memset(&cf_info, 0, sizeof(cf_info)); + if (!qctri(&cf_info)) { + pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", + cpu, cf_info.cfvn, cf_info.csvn, + cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); + print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET, + &cf_info, sizeof(cf_info)); + } + + local_irq_restore(flags); +} + +/* See also arch/s390/kernel/traps.c */ +static unsigned long __store_trace(struct perf_callchain_entry *entry, + unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, + sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + unsigned long head; + struct stack_frame *head_sf; + + if (user_mode(regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame *) head; + + if (!head_sf || !head_sf->back_chain) + return; + + head = head_sf->back_chain; + head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __store_trace(entry, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} -- cgit v1.2.3