diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-06-25 00:04:12 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-06-25 00:04:12 +0200 |
commit | eb441289f940c86df47db95044820fa5cf90c21f (patch) | |
tree | fa66237b1244afb4ed0a28962a793e5103669882 | |
parent | bonding: do not assume skb mac_header is set (diff) | |
parent | igc: Work around HW bug causing missing timestamps (diff) | |
download | linux-eb441289f940c86df47db95044820fa5cf90c21f.tar.xz linux-eb441289f940c86df47db95044820fa5cf90c21f.zip |
Merge branch '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:
====================
igc: TX timestamping fixes
This is the fixes part of the series intended to add support for using
the 4 timestamp registers present in i225/i226.
Moving the timestamp handling to be inline with the interrupt handling
has the advantage of improving the TX timestamping retrieval latency,
here are some numbers using ntpperf:
Before:
$ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37
| responses | TX timestamp offset (ns)
rate clients | lost invalid basic xleave | min mean max stddev
1000 100 0.00% 0.00% 0.00% 100.00% -56 +9 +52 19
1500 150 0.00% 0.00% 0.00% 100.00% -40 +30 +75 22
2250 225 0.00% 0.00% 0.00% 100.00% -11 +29 +72 15
3375 337 0.00% 0.00% 0.00% 100.00% -18 +40 +88 22
5062 506 0.00% 0.00% 0.00% 100.00% -19 +23 +77 15
7593 759 0.00% 0.00% 0.00% 100.00% +7 +47 +5168 43
11389 1138 0.00% 0.00% 0.00% 100.00% -11 +41 +5240 39
17083 1708 0.00% 0.00% 0.00% 100.00% +19 +60 +5288 50
25624 2562 0.00% 0.00% 0.00% 100.00% +1 +56 +5368 58
38436 3843 0.00% 0.00% 0.00% 100.00% -84 +12 +8847 66
57654 5765 0.00% 0.00% 100.00% 0.00%
86481 8648 0.00% 0.00% 100.00% 0.00%
129721 12972 0.00% 0.00% 100.00% 0.00%
194581 16384 0.00% 0.00% 100.00% 0.00%
291871 16384 27.35% 0.00% 72.65% 0.00%
437806 16384 50.05% 0.00% 49.95% 0.00%
After:
$ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37
| responses | TX timestamp offset (ns)
rate clients | lost invalid basic xleave | min mean max stddev
1000 100 0.00% 0.00% 0.00% 100.00% -44 +0 +61 19
1500 150 0.00% 0.00% 0.00% 100.00% -6 +39 +81 16
2250 225 0.00% 0.00% 0.00% 100.00% -22 +25 +69 15
3375 337 0.00% 0.00% 0.00% 100.00% -28 +15 +56 14
5062 506 0.00% 0.00% 0.00% 100.00% +7 +78 +143 27
7593 759 0.00% 0.00% 0.00% 100.00% -54 +24 +144 47
11389 1138 0.00% 0.00% 0.00% 100.00% -90 -33 +28 21
17083 1708 0.00% 0.00% 0.00% 100.00% -50 -2 +35 14
25624 2562 0.00% 0.00% 0.00% 100.00% -62 +7 +66 23
38436 3843 0.00% 0.00% 0.00% 100.00% -33 +30 +5395 36
57654 5765 0.00% 0.00% 100.00% 0.00%
86481 8648 0.00% 0.00% 100.00% 0.00%
129721 12972 0.00% 0.00% 100.00% 0.00%
194581 16384 19.50% 0.00% 80.50% 0.00%
291871 16384 35.81% 0.00% 64.19% 0.00%
437806 16384 55.40% 0.00% 44.60% 0.00%
During this series, and to show that as is always the case, things are
never easy as they should be, a hardware issue was found, and it took
some time to find the workaround(s). The bug and workaround are better
explained in patch 4/4.
Note: the workaround has a simpler alternative, but it would involve
adding support for the other timestamp registers, and only using the
TXSTMP{H/L}_0 as a way to clear the interrupt. But I feel bad about
throwing this kind of resources away. Didn't test this extensively but
it should work.
Also, as Marc Kleine-Budde suggested, after some consensus is reached
on this series, most parts of it will be proposed for igb.
* '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
igc: Work around HW bug causing missing timestamps
igc: Retrieve TX timestamp during interrupt handling
igc: Check if hardware TX timestamping is enabled earlier
igc: Fix race condition in PTP tx code
====================
Link: https://lore.kernel.org/r/20230622165244.2202786-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | drivers/net/ethernet/intel/igc/igc.h | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igc/igc_main.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igc/igc_ptp.c | 142 |
3 files changed, 117 insertions, 47 deletions
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 34aebf00a512..a00738bf6b19 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -228,7 +228,10 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; - struct work_struct ptp_tx_work; + /* Access to ptp_tx_skb and ptp_tx_start are protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; unsigned long ptp_tx_start; @@ -401,7 +404,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { @@ -578,6 +580,7 @@ enum igc_ring_flags_t { IGC_RING_FLAG_TX_CTX_IDX, IGC_RING_FLAG_TX_DETECT_HANG, IGC_RING_FLAG_AF_XDP_ZC, + IGC_RING_FLAG_TX_HWTSTAMP, }; #define ring_uses_large_buffer(ring) \ @@ -634,6 +637,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fa764190f270..eb4f0e562f60 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1585,14 +1585,16 @@ done: } } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { /* FIXME: add support for retrieving timestamps from * the other timer registers before skipping the * timestamping request. */ - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + if (!adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; @@ -1601,6 +1603,8 @@ done: } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } if (skb_vlan_tag_present(skb)) { @@ -5219,7 +5223,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); + igc_ptp_tx_tstamp_event(adapter); ack |= IGC_TSICR_TXTS; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 4e10ced736db..32ef112f8291 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -536,9 +536,34 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCRXCTL, val); } +static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) +{ + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; + + /* Clear the flags first to avoid new packets to be enqueued + * for TX timestamping. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + + /* Now we can clean the pending TX timestamp requests. */ + igc_ptp_clear_tx_tstamp(adapter); wr32(IGC_TSYNCTXCTL, 0); } @@ -546,12 +571,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG); /* Read TXSTMP registers to discard any timestamp previously stored. */ rd32(IGC_TXSTMPL); rd32(IGC_TXSTMPH); + + /* The hardware is ready to accept TX timestamp requests, + * notify the transmit path. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + } /** @@ -603,6 +639,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } +/* Requires adapter->ptp_tx_lock held by caller. */ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -610,7 +647,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(IGC_TXSTMPH); netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); @@ -618,20 +654,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); - } + if (!adapter->ptp_tx_skb) + goto unlock; + + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) + goto unlock; + + igc_ptp_tx_timeout(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -641,20 +677,57 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. + * + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + u32 tsynctxctl; int adjust = 0; u64 regval; if (WARN_ON_ONCE(!skb)) return; - regval = rd32(IGC_TXSTMPL); - regval |= (u64)rd32(IGC_TXSTMPH) << 32; + tsynctxctl = rd32(IGC_TSYNCTXCTL); + tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; + if (tsynctxctl) { + regval = rd32(IGC_TXSTMPL); + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } else { + /* There's a bug in the hardware that could cause + * missing interrupts for TX timestamping. The issue + * is that for new interrupts to be triggered, the + * IGC_TXSTMPH_0 register must be read. + * + * To avoid discarding a valid timestamp that just + * happened at the "wrong" time, we need to confirm + * that there was no timestamp captured, we do that by + * assuming that no two timestamps in sequence have + * the same nanosecond value. + * + * So, we read the "low" register, read the "high" + * register (to latch a new timestamp) and read the + * "low" register again, if "old" and "new" versions + * of the "low" register are different, a valid + * timestamp was captured, we can read the "high" + * register again. + */ + u32 txstmpl_old, txstmpl_new; + + txstmpl_old = rd32(IGC_TXSTMPL); + rd32(IGC_TXSTMPH); + txstmpl_new = rd32(IGC_TXSTMPL); + + if (txstmpl_old == txstmpl_new) + return; + + regval = txstmpl_new; + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval)) return; @@ -676,13 +749,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); @@ -690,27 +757,25 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) } /** - * igc_ptp_tx_work - * @work: pointer to work struct + * igc_ptp_tx_tstamp_event + * @adapter: board private structure * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. + * Called when a TX timestamp interrupt happens to retrieve the + * timestamp and send it up to the socket. */ -static void igc_ptp_tx_work(struct work_struct *work) +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) { - struct igc_adapter *adapter = container_of(work, struct igc_adapter, - ptp_tx_work); - struct igc_hw *hw = &adapter->hw; - u32 tsynctxctl; + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + if (!adapter->ptp_tx_skb) + goto unlock; igc_ptp_tx_hwtstamp(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -959,8 +1024,8 @@ void igc_ptp_init(struct igc_adapter *adapter) return; } + spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -1020,10 +1085,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter) if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - cancel_work_sync(&adapter->ptp_tx_work); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + igc_ptp_clear_tx_tstamp(adapter); if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); |