From cd64647f043e3fd3569bcf068f47f030198ff93a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Sep 2013 16:43:58 +0800 Subject: hung_task: Change sysctl_hung_task_check_count to 'int' As 'sysctl_hung_task_check_count' is 'unsigned long' when this value is assigned to max_count in check_hung_uninterruptible_tasks(), it's truncated to 'int' type. This causes a minor artifact: if we write 2^32 to sysctl.hung_task_check_count, hung task detection will be effectively disabled. With this fix, it will still truncate the user input to 32 bits, but reading sysctl.hung_task_check_count reflects the actual truncated value. Signed-off-by: Li Zefan Acked-by: Ingo Molnar Link: http://lkml.kernel.org/r/523FFF4E.9050401@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bf8086b2506e..9552afa733d8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -2,8 +2,8 @@ #define _SCHED_SYSCTL_H #ifdef CONFIG_DETECT_HUNG_TASK +extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, -- cgit v1.2.3 From 6a716c90a51338009c3bc1f460829afaed8f922d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Oct 2013 18:18:28 +0200 Subject: hung_task debugging: Add tracepoint to report the hang Currently check_hung_task() prints a warning if it detects the problem, but it is not convenient to watch the system logs if user-space wants to be notified about the hang. Add the new trace_sched_process_hang() into check_hung_task(), this way a user-space monitor can easily wait for the hang and potentially resolve a problem. Signed-off-by: Oleg Nesterov Cc: Dave Sullivan Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20131019161828.GA7439@redhat.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 19 +++++++++++++++++++ kernel/hung_task.c | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 2e7d9947a10d..2a652d124fbb 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + ), + + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) +); +#endif /* CONFIG_DETECT_HUNG_TASK */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 042252383fd2..8807061ca004 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * The number of tasks checked: @@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_count = switch_count; return; } + + trace_sched_process_hang(t); + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; -- cgit v1.2.3 From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:58 -0700 Subject: net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz Acked-by: Julian Anastasov Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Jesse Gross Cc: Mathieu Desnoyers Cc: "Michael S. Tsirkin" Cc: Mirko Lindner Cc: Patrick McHardy Cc: Roger Luethi Cc: Rusty Russell Cc: Simon Horman Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Petazzoni Cc: Wensong Zhang Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/net/dummy.c | 6 ++++++ drivers/net/ethernet/emulex/benet/be_main.c | 4 ++++ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ drivers/net/ethernet/marvell/mvneta.c | 3 +++ drivers/net/ethernet/marvell/sky2.c | 3 +++ drivers/net/ethernet/neterion/vxge/vxge-main.c | 4 ++++ drivers/net/ethernet/nvidia/forcedeth.c | 2 ++ drivers/net/ethernet/realtek/8139too.c | 3 +++ drivers/net/ethernet/tile/tilepro.c | 2 ++ drivers/net/ethernet/via/via-rhine.c | 3 +++ drivers/net/ifb.c | 5 +++++ drivers/net/loopback.c | 6 ++++++ drivers/net/macvlan.c | 7 +++++++ drivers/net/nlmon.c | 8 ++++++++ drivers/net/team/team.c | 6 ++++++ drivers/net/team/team_mode_loadbalance.c | 9 ++++++++- drivers/net/veth.c | 8 ++++++++ drivers/net/virtio_net.c | 8 ++++++++ drivers/net/vxlan.c | 8 ++++++++ drivers/net/xen-netfront.c | 6 ++++++ include/linux/u64_stats_sync.h | 7 +++++++ net/8021q/vlan_dev.c | 9 ++++++++- net/bridge/br_device.c | 7 +++++++ net/ipv4/af_inet.c | 14 ++++++++++++++ net/ipv4/ip_tunnel.c | 8 +++++++- net/ipv6/addrconf.c | 14 ++++++++++++++ net/ipv6/af_inet6.c | 14 ++++++++++++++ net/ipv6/ip6_gre.c | 15 +++++++++++++++ net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/sit.c | 15 +++++++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 25 ++++++++++++++++++++++--- net/openvswitch/datapath.c | 6 ++++++ net/openvswitch/vport.c | 8 ++++++++ 34 files changed, 253 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index b710c6b2d659..bd8f84b0b894 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -88,10 +88,16 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) static int dummy_dev_init(struct net_device *dev) { + int i; dev->dstats = alloc_percpu(struct pcpu_dstats); if (!dev->dstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_dstats *dstats; + dstats = per_cpu_ptr(dev->dstats, i); + u64_stats_init(&dstats->syncp); + } return 0; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 2c38cc402119..edd75950855a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2047,6 +2047,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) if (status) return status; + u64_stats_init(&txo->stats.sync); + u64_stats_init(&txo->stats.sync_compl); + /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ @@ -2108,6 +2111,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) if (rc) return rc; + u64_stats_init(&rxo->stats.sync); eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; rc = be_cmd_cq_create(adapter, cq, eq, false, 3); if (rc) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8cf44f2a8ccd..b6edb93a8fc1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1223,6 +1223,9 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + u64_stats_init(&ring->tx_syncp); + u64_stats_init(&ring->tx_syncp2); + /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -1256,6 +1259,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->rx_ring_count; ring->queue_index = rxr_idx; + u64_stats_init(&ring->rx_syncp); + /* assign ring to adapter */ adapter->rx_ring[rxr_idx] = ring; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0ade0cd5ef53..c1750364ddc0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4867,6 +4867,8 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); @@ -4949,6 +4951,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e35bac7cfdf1..cb4635c0cd73 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2792,6 +2792,9 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); + u64_stats_init(&pp->tx_stats.syncp); + u64_stats_init(&pp->rx_stats.syncp); + pp->weight = MVNETA_RX_POLL_WEIGHT; pp->phy_node = phy_node; pp->phy_interface = phy_mode; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index e09a8c6f8536..339d841a538b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4763,6 +4763,9 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, sky2->hw = hw; sky2->msg_enable = netif_msg_init(debug, default_msg); + u64_stats_init(&sky2->tx_stats.syncp); + u64_stats_init(&sky2->rx_stats.syncp); + /* Auto speed and flow control */ sky2->flags = SKY2_FLAG_AUTO_SPEED | SKY2_FLAG_AUTO_PAUSE; if (hw->chip_id != CHIP_ID_YUKON_XL) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5a20eaf903dd..44626ec1127b 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2072,6 +2072,10 @@ static int vxge_open_vpaths(struct vxgedev *vdev) vdev->config.tx_steering_type; vpath->fifo.ndev = vdev->ndev; vpath->fifo.pdev = vdev->pdev; + + u64_stats_init(&vpath->fifo.stats.syncp); + u64_stats_init(&vpath->ring.stats.syncp); + if (vdev->config.tx_steering_type) vpath->fifo.txq = netdev_get_tx_queue(vdev->ndev, i); diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 098b96dad66f..2d045be4b5cf 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5619,6 +5619,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) spin_lock_init(&np->lock); spin_lock_init(&np->hwstats_lock); SET_NETDEV_DEV(dev, &pci_dev->dev); + u64_stats_init(&np->swstats_rx_syncp); + u64_stats_init(&np->swstats_tx_syncp); init_timer(&np->oom_kick); np->oom_kick.data = (unsigned long) dev; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 3ccedeb8aba0..c40e984868ad 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -791,6 +791,9 @@ static struct net_device *rtl8139_init_board(struct pci_dev *pdev) pci_set_master (pdev); + u64_stats_init(&tp->rx_stats.syncp); + u64_stats_init(&tp->tx_stats.syncp); + retry: /* PIO bar register comes first. */ bar = !use_io; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 106be47716e7..edb2e12a0fe2 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -1008,6 +1008,8 @@ static void tile_net_register(void *dev_ptr) info->egress_timer.data = (long)info; info->egress_timer.function = tile_net_handle_egress_timer; + u64_stats_init(&info->stats.syncp); + priv->cpu[my_cpu] = info; /* diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index bdf697b184ae..dd99715e6645 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -987,6 +987,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rp->base = ioaddr; + u64_stats_init(&rp->tx_stats.syncp); + u64_stats_init(&rp->rx_stats.syncp); + /* Get chip registers into a sane state */ rhine_power_init(dev); rhine_hw_init(dev, pioaddr); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index a3bed28197d2..c14d39bf32d0 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -265,6 +265,7 @@ MODULE_PARM_DESC(numifbs, "Number of ifb devices"); static int __init ifb_init_one(int index) { struct net_device *dev_ifb; + struct ifb_private *dp; int err; dev_ifb = alloc_netdev(sizeof(struct ifb_private), @@ -273,6 +274,10 @@ static int __init ifb_init_one(int index) if (!dev_ifb) return -ENOMEM; + dp = netdev_priv(dev_ifb); + u64_stats_init(&dp->rsync); + u64_stats_init(&dp->tsync); + dev_ifb->rtnl_link_ops = &ifb_link_ops; err = register_netdevice(dev_ifb); if (err < 0) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a17d85a331f1..ac24c27b4b2d 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -137,10 +137,16 @@ static const struct ethtool_ops loopback_ethtool_ops = { static int loopback_dev_init(struct net_device *dev) { + int i; dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_lstats *lb_stats; + lb_stats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&lb_stats->syncp); + } return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9bf46bd19b87..0924e51b9ee0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -501,6 +501,7 @@ static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); const struct net_device *lowerdev = vlan->lowerdev; + int i; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); @@ -516,6 +517,12 @@ static int macvlan_init(struct net_device *dev) if (!vlan->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct macvlan_pcpu_stats *mvlstats; + mvlstats = per_cpu_ptr(vlan->pcpu_stats, i); + u64_stats_init(&mvlstats->syncp); + } + return 0; } diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index b57ce5f48962..d2bb12bfabd5 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -47,8 +47,16 @@ static int nlmon_change_mtu(struct net_device *dev, int new_mtu) static int nlmon_dev_init(struct net_device *dev) { + int i; + dev->lstats = alloc_percpu(struct pcpu_lstats); + for_each_possible_cpu(i) { + struct pcpu_lstats *nlmstats; + nlmstats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&nlmstats->syncp); + } + return dev->lstats == NULL ? -ENOMEM : 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 50e43e64d51d..6574eb8766f9 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1540,6 +1540,12 @@ static int team_init(struct net_device *dev) if (!team->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct team_pcpu_stats *team_stats; + team_stats = per_cpu_ptr(team->pcpu_stats, i); + u64_stats_init(&team_stats->syncp); + } + for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) INIT_HLIST_HEAD(&team->en_port_hlist[i]); INIT_LIST_HEAD(&team->port_list); diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 829a9cd2b4da..d671fc3ac5ac 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -570,7 +570,7 @@ static int lb_init(struct team *team) { struct lb_priv *lb_priv = get_lb_priv(team); lb_select_tx_port_func_t *func; - int err; + int i, err; /* set default tx port selector */ func = lb_select_tx_port_get_func("hash"); @@ -588,6 +588,13 @@ static int lb_init(struct team *team) goto err_alloc_pcpu_stats; } + for_each_possible_cpu(i) { + struct lb_pcpu_stats *team_lb_stats; + team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + u64_stats_init(&team_lb_stats->syncp); + } + + INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index eee1f19ef1e9..46e83e3fe999 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -230,10 +230,18 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static int veth_dev_init(struct net_device *dev) { + int i; + dev->vstats = alloc_percpu(struct pcpu_vstats); if (!dev->vstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_vstats *veth_stats; + veth_stats = per_cpu_ptr(dev->vstats, i); + u64_stats_init(&veth_stats->syncp); + } + return 0; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd1e1a0..ee384f3d612b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1569,6 +1569,14 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; + for_each_possible_cpu(i) { + struct virtnet_stats *virtnet_stats; + virtnet_stats = per_cpu_ptr(vi->stats, i); + u64_stats_init(&virtnet_stats->tx_syncp); + u64_stats_init(&virtnet_stats->rx_syncp); + } + + vi->vq_index = alloc_percpu(int); if (vi->vq_index == NULL) goto free_stats; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ef5b6219f3f..01ab64d5f9a4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1884,11 +1884,19 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_sock *vs; + int i; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *vxlan_stats; + vxlan_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&vxlan_stats->syncp); + } + + spin_lock(&vn->sock_lock); vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); if (vs) { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 36808bf25677..54223ac6d8a6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1338,6 +1338,12 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) if (np->stats == NULL) goto exit; + for_each_possible_cpu(i) { + struct netfront_stats *xen_nf_stats; + xen_nf_stats = per_cpu_ptr(np->stats, i); + u64_stats_init(&xen_nf_stats->syncp); + } + /* Initialise tx_skbs as a free chain containing every entry. */ np->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 8da8c4e87da3..7bfabd20204c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -67,6 +67,13 @@ struct u64_stats_sync { #endif }; + +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +# define u64_stats_init(syncp) seqcount_init(syncp.seq) +#else +# define u64_stats_init(syncp) do { } while (0) +#endif + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09bf1c38805b..4deff3ed1da1 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -558,7 +558,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; + int subclass = 0, i; netif_carrier_off(dev); @@ -612,6 +612,13 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct vlan_pcpu_stats *vlan_stat; + vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + u64_stats_init(&vlan_stat->syncp); + } + + return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163635da..7893d641775b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -88,11 +88,18 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + int i; br->stats = alloc_percpu(struct br_cpu_netstats); if (!br->stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct br_cpu_netstats *br_dev_stats; + br_dev_stats = per_cpu_ptr(br->stats, i); + u64_stats_init(&br_dev_stats->syncp); + } + return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cfeb85cff4f0..5f4617e377b8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1518,6 +1518,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) return -ENOMEM; + #if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) { @@ -1561,6 +1562,8 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1569,6 +1572,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet_stats; + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + u64_stats_init(&af_inet_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); + u64_stats_init(&af_inet_stats->syncp); +#endif + } + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib), __alignof__(struct linux_mib)) < 0) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 63a6d6d6b875..caf01176a5e4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -976,13 +976,19 @@ int ip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - int err; + int i, err; dev->destructor = ip_tunnel_dev_free; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipt_stats; + ipt_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipt_stats->syncp); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { free_percpu(dev->tstats); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb301da38..d62d589bb622 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -281,10 +281,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { + int i; + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip; + + for_each_possible_cpu(i) { + struct ipstats_mib *addrconf_stats; + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + u64_stats_init(&addrconf_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + u64_stats_init(&addrconf_stats->syncp); +#endif + } + + idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), GFP_KERNEL); if (!idev->stats.icmpv6dev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100b021e..a8f8559b3dce 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -719,6 +719,8 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof(struct udp_mib), __alignof__(struct udp_mib)) < 0) @@ -731,6 +733,18 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet6_stats; + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + u64_stats_init(&af_inet6_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); + u64_stats_init(&af_inet6_stats->syncp); +#endif + } + + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bf4a9a084de5..8acb28621f9c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1252,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1269,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tunnel_stats; + ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tunnel_stats->syncp); + } + + return 0; } @@ -1449,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1462,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tap_stats; + ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tap_stats->syncp); + } + return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 583b77e2f69b..df1fa58528c6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1494,12 +1494,19 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6_tnl_stats; + ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6_tnl_stats->syncp); + } return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 19269453a8ea..365dc5473eb2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1310,6 +1310,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1322,6 +1323,12 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_tunnel_stats; + ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_tunnel_stats->syncp); + } + return 0; } @@ -1331,6 +1338,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1344,6 +1352,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_fb_stats; + ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_fb_stats->syncp); + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..3825725907f6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; - unsigned int atype; + unsigned int atype, i; EnterFunction(2); @@ -869,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, if (!dest->stats.cpustats) goto err_alloc; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_dest_stats; + ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); + u64_stats_init(&ip_vs_dest_stats->syncp); + } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; @@ -1134,7 +1140,7 @@ static int ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0; + int ret = 0, i; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; @@ -1184,6 +1190,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_stats; + ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); + u64_stats_init(&ip_vs_stats->syncp); + } + + /* I'm the first user of the service */ atomic_set(&svc->refcnt, 0); @@ -3780,7 +3793,7 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct net *net) { - int idx; + int i, idx; struct netns_ipvs *ipvs = net_ipvs(net); /* Initialize rs_table */ @@ -3799,6 +3812,12 @@ int __net_init ip_vs_control_net_init(struct net *net) if (!ipvs->tot_stats.cpustats) return -ENOMEM; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ipvs_tot_stats; + ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); + u64_stats_init(&ipvs_tot_stats->syncp); + } + spin_lock_init(&ipvs->tot_stats.lock); proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd7f2b2..b92553c02279 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1698,6 +1698,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + for_each_possible_cpu(i) { + struct dp_stats_percpu *dpath_stats; + dpath_stats = per_cpu_ptr(dp->stats_percpu, i); + u64_stats_init(&dpath_stats->sync); + } + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6f65dbe13812..d830a95f03a4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -118,6 +118,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; + int i; alloc_size = sizeof(struct vport); if (priv_size) { @@ -141,6 +142,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return ERR_PTR(-ENOMEM); } + for_each_possible_cpu(i) { + struct pcpu_tstats *vport_stats; + vport_stats = per_cpu_ptr(vport->percpu_stats, i); + u64_stats_init(&vport_stats->syncp); + } + + spin_lock_init(&vport->stats_lock); return vport; -- cgit v1.2.3 From 1ca7d67cf5d5a2aef26a8d9afd789006fa098347 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:59 -0700 Subject: seqcount: Add lockdep functionality to seqcount/seqlock structures Currently seqlocks and seqcounts don't support lockdep. After running across a seqcount related deadlock in the timekeeping code, I used a less-refined and more focused variant of this patch to narrow down the cause of the issue. This is a first-pass attempt to properly enable lockdep functionality on seqlocks and seqcounts. Since seqcounts are used in the vdso gettimeofday code, I've provided non-lockdep accessors for those needs. I've also handled one case where there were nested seqlock writers and there may be more edge cases. Comments and feedback would be appreciated! Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Cc: Eric Dumazet Cc: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/vdso/vclock_gettime.c | 8 ++--- fs/dcache.c | 4 +-- fs/fs_struct.c | 2 +- include/linux/init_task.h | 8 ++--- include/linux/lockdep.h | 8 +++-- include/linux/seqlock.h | 79 ++++++++++++++++++++++++++++++++++++++---- mm/filemap_xip.c | 2 +- 7 files changed, 90 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 72074d528400..2ada505067cc 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; @@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; @@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); diff --git a/fs/dcache.c b/fs/dcache.c index ae6ebb88ceff..f750be22c08c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2574,7 +2574,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&target->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ @@ -2706,7 +2706,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&anon->d_seq); + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); dparent = dentry->d_parent; diff --git a/fs/fs_struct.c b/fs/fs_struct.c index d8ac61d0c932..7dca743b2ce1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), - .seq = SEQCNT_ZERO, + .seq = SEQCNT_ZERO(init_fs.seq), .umask = 0022, }; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f0949927..b0ed422e4e4a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,10 +32,10 @@ extern struct fs_struct init_fs; #endif #ifdef CONFIG_CPUSETS -#define INIT_CPUSET_SEQ \ - .mems_allowed_seq = SEQCNT_ZERO, +#define INIT_CPUSET_SEQ(tsk) \ + .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), #else -#define INIT_CPUSET_SEQ +#define INIT_CPUSET_SEQ(tsk) #endif #define INIT_SIGNALS(sig) { \ @@ -220,7 +220,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ - INIT_CPUSET_SEQ \ + INIT_CPUSET_SEQ(tsk) \ INIT_VTIME(tsk) \ } diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index cfc2f119779a..92b1bfc5da60 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -497,6 +497,10 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) #define rwlock_release(l, n, i) lock_release(l, n, i) +#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define seqcount_release(l, n, i) lock_release(l, n, i) + #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define mutex_release(l, n, i) lock_release(l, n, i) @@ -504,11 +508,11 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) -# define rwsem_release(l, n, i) lock_release(l, n, i) +#define rwsem_release(l, n, i) lock_release(l, n, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) -# define lock_map_release(l) lock_release(l, 1, _THIS_IP_) +#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 21a209336e79..1e8a8b6e837d 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -34,6 +34,7 @@ #include #include +#include #include /* @@ -44,10 +45,50 @@ */ typedef struct seqcount { unsigned sequence; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } seqcount_t; -#define SEQCNT_ZERO { 0 } -#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) +static inline void __seqcount_init(seqcount_t *s, const char *name, + struct lock_class_key *key) +{ + /* + * Make sure we are not reinitializing a held lock: + */ + lockdep_init_map(&s->dep_map, name, key, 0); + s->sequence = 0; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SEQCOUNT_DEP_MAP_INIT(lockname) \ + .dep_map = { .name = #lockname } \ + +# define seqcount_init(s) \ + do { \ + static struct lock_class_key __key; \ + __seqcount_init((s), #s, &__key); \ + } while (0) + +static inline void seqcount_lockdep_reader_access(const seqcount_t *s) +{ + seqcount_t *l = (seqcount_t *)s; + unsigned long flags; + + local_irq_save(flags); + seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); + seqcount_release(&l->dep_map, 1, _RET_IP_); + local_irq_restore(flags); +} + +#else +# define SEQCOUNT_DEP_MAP_INIT(lockname) +# define seqcount_init(s) __seqcount_init(s, NULL, NULL) +# define seqcount_lockdep_reader_access(x) +#endif + +#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} + /** * __read_seqcount_begin - begin a seq-read critical section (without barrier) @@ -75,6 +116,22 @@ repeat: return ret; } +/** + * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * read_seqcount_begin_no_lockdep opens a read critical section of the given + * seqcount, but without any lockdep checking. Validity of the critical + * section is tested by checking read_seqcount_retry function. + */ +static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) +{ + unsigned ret = __read_seqcount_begin(s); + smp_rmb(); + return ret; +} + /** * read_seqcount_begin - begin a seq-read critical section * @s: pointer to seqcount_t @@ -86,9 +143,8 @@ repeat: */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { - unsigned ret = __read_seqcount_begin(s); - smp_rmb(); - return ret; + seqcount_lockdep_reader_access(s); + return read_seqcount_begin_no_lockdep(s); } /** @@ -108,6 +164,8 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = ACCESS_ONCE(s->sequence); + + seqcount_lockdep_reader_access(s); smp_rmb(); return ret & ~1; } @@ -152,14 +210,21 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) * Sequence counter only version assumes that callers are using their * own mutexing. */ -static inline void write_seqcount_begin(seqcount_t *s) +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { s->sequence++; smp_wmb(); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); } static inline void write_seqcount_end(seqcount_t *s) { + seqcount_release(&s->dep_map, 1, _RET_IP_); smp_wmb(); s->sequence++; } @@ -188,7 +253,7 @@ typedef struct { */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ - .seqcount = SEQCNT_ZERO, \ + .seqcount = SEQCNT_ZERO(lockname), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 28fe26b64f8a..d8d9fe3f685c 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -26,7 +26,7 @@ * of ZERO_PAGE(), such as /dev/zero */ static DEFINE_MUTEX(xip_sparse_mutex); -static seqcount_t xip_sparse_seq = SEQCNT_ZERO; +static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq); static struct page *__xip_sparse_page; /* called under xip_sparse_mutex */ -- cgit v1.2.3 From db751fe3ea6880ff5ac5abe60cb7b80deb5a4140 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:00 -0700 Subject: cpuset: Fix potential deadlock w/ set_mems_allowed After adding lockdep support to seqlock/seqcount structures, I started seeing the following warning: [ 1.070907] ====================================================== [ 1.072015] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 1.073181] 3.11.0+ #67 Not tainted [ 1.073801] ------------------------------------------------------ [ 1.074882] kworker/u4:2/708 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1.076088] (&p->mems_allowed_seq){+.+...}, at: [] new_slab+0x5f/0x280 [ 1.077572] [ 1.077572] and this task is already holding: [ 1.078593] (&(&q->__queue_lock)->rlock){..-...}, at: [] blk_execute_rq_nowait+0x53/0xf0 [ 1.080042] which would create a new lock dependency: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...} [ 1.080042] [ 1.080042] but this new dependency connects a SOFTIRQ-irq-safe lock: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} [ 1.080042] ... which became SOFTIRQ-irq-safe at: [ 1.080042] [] __lock_acquire+0x5b9/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] _raw_spin_lock+0x41/0x80 [ 1.080042] [] scsi_device_unbusy+0x7e/0xd0 [ 1.080042] [] scsi_finish_command+0x32/0xf0 [ 1.080042] [] scsi_softirq_done+0xa1/0x130 [ 1.080042] [] blk_done_softirq+0x73/0x90 [ 1.080042] [] __do_softirq+0x110/0x2f0 [ 1.080042] [] run_ksoftirqd+0x2d/0x60 [ 1.080042] [] smpboot_thread_fn+0x156/0x1e0 [ 1.080042] [] kthread+0xd6/0xe0 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] to a SOFTIRQ-irq-unsafe lock: [ 1.080042] (&p->mems_allowed_seq){+.+...} [ 1.080042] ... which became SOFTIRQ-irq-unsafe at: [ 1.080042] ... [] __lock_acquire+0x613/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] kthreadd+0x82/0x180 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] other info that might help us debug this: [ 1.080042] [ 1.080042] Possible interrupt unsafe locking scenario: [ 1.080042] [ 1.080042] CPU0 CPU1 [ 1.080042] ---- ---- [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] local_irq_disable(); [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] [ 1.080042] *** DEADLOCK *** The issue stems from the kthreadd() function calling set_mems_allowed with irqs enabled. While its possibly unlikely for the actual deadlock to trigger, a fix is fairly simple: disable irqs before taking the mems_allowed_seq lock. Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Acked-by: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cc1b01cf2035..3fe661fe96d1 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -110,10 +110,14 @@ static inline bool put_mems_allowed(unsigned int seq) static inline void set_mems_allowed(nodemask_t nodemask) { + unsigned long flags; + task_lock(current); + local_irq_save(flags); write_seqcount_begin(¤t->mems_allowed_seq); current->mems_allowed = nodemask; write_seqcount_end(¤t->mems_allowed_seq); + local_irq_restore(flags); task_unlock(current); } -- cgit v1.2.3 From 67a6de49bf545c34eb8dee99abbb92d9ea268200 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Nov 2013 08:26:39 +0100 Subject: locking/doc: Update references to kernel/mutex.c Fix this docbook error: >> docproc: kernel/mutex.c: No such file or directory by updating the stale references to kernel/mutex.c. Reported-by: fengguang.wu@intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-34pikw1tlsskj65rrt5iusrq@git.kernel.org Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-locking.tmpl | 2 +- Documentation/mutex-design.txt | 10 +++++----- include/linux/mutex.h | 2 +- kernel/locking/mutex.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 09e884e5b9f5..19f2a5a5a5b4 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1958,7 +1958,7 @@ machines due to caching. Mutex API reference !Iinclude/linux/mutex.h -!Ekernel/mutex.c +!Ekernel/locking/mutex.c diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index 38c10fd7f411..1dfe62c3641d 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -116,11 +116,11 @@ using mutexes at the moment, please let me know if you find any. ] Implementation of mutexes ------------------------- -'struct mutex' is the new mutex type, defined in include/linux/mutex.h -and implemented in kernel/mutex.c. It is a counter-based mutex with a -spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", -0 for "locked" and negative numbers (usually -1) for "locked, potential -waiters queued". +'struct mutex' is the new mutex type, defined in include/linux/mutex.h and +implemented in kernel/locking/mutex.c. It is a counter-based mutex with a +spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for +"locked" and negative numbers (usually -1) for "locked, potential waiters +queued". the APIs of 'struct mutex' have been streamlined: diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da8a0f0..d3181936c138 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -131,7 +131,7 @@ static inline int mutex_is_locked(struct mutex *lock) } /* - * See kernel/mutex.c for detailed documentation of these APIs. + * See kernel/locking/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d24105b1b794..4dd6e4c219de 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1,5 +1,5 @@ /* - * kernel/mutex.c + * kernel/locking/mutex.c * * Mutexes: blocking mutual exclusion locks * -- cgit v1.2.3