diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-25 22:05:15 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-25 22:05:15 +0100 |
commit | 2611df7a799e71ed258e9f0955a18c216b6139bd (patch) | |
tree | 7a904f8c8643c2e61235fbea0309ef4c1067f1a3 | |
parent | net: Move net:netns_ids destruction out of rtnl_lock() and document locking s... (diff) | |
parent | sfc: support Medford2 frequency adjustment format (diff) | |
download | linux-2611df7a799e71ed258e9f0955a18c216b6139bd.tar.xz linux-2611df7a799e71ed258e9f0955a18c216b6139bd.zip |
Merge branch 'sfc-support-PTP-on-8000-and-X2000-series-NICs'
Edward Cree says:
====================
sfc: support PTP on 8000 and X2000 series NICs
Starting from the 8000-series (Medford 1), SFC NICs can timestamp TX packets
sent through an ordinary DMA queue, rather than a special control-plane
operation as in the 7000-series. Patches 2-8 implement support for this.
The X2000-series (Medford 2) changes the format of timestamps, from seconds+
(2^27)ths to seconds + quarter nanoseconds, as well as changing the shift
of the frequency adjustment for increased precision. Patches 9-12
implement support for these changes.
Patch #1 is an unrelated fix for NAPI budget handling, needed in order for
TX completion changes in the later patches to apply cleanly.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/sfc/ef10.c | 158 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/farch.c | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ptp.c | 368 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/tx.c | 21 |
7 files changed, 489 insertions, 120 deletions
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 8ae467db9162..75fbf58e421c 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -322,6 +322,25 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) return 0; } +static void efx_ef10_read_licensed_features(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_LICENSING_V3_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_LICENSING_V3_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, LICENSING_V3_IN_OP, + MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_LICENSING_V3, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc || (outlen < MC_CMD_LICENSING_V3_OUT_LEN)) + return; + + nic_data->licensed_features = MCDI_QWORD(outbuf, + LICENSING_V3_OUT_LICENSED_FEATURES); +} + static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN); @@ -722,6 +741,8 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc < 0) goto fail5; + efx_ef10_read_licensed_features(efx); + /* We can have one VI for each vi_stride-byte region. * However, until we use TX option descriptors we need two TX queues * per channel. @@ -760,14 +781,7 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc && rc != -EPERM) goto fail5; - rc = efx_ptp_probe(efx, NULL); - /* Failure to probe PTP is not fatal. - * In the case of EPERM, efx_ptp_probe will print its own message (in - * efx_ptp_get_attributes()), so we don't need to. - */ - if (rc && rc != -EPERM) - netif_warn(efx, drv, efx->net_dev, - "Failed to probe PTP, rc=%d\n", rc); + efx_ptp_defer_probe_with_channel(efx); #ifdef CONFIG_SFC_SRIOV if ((efx->pci_dev->physfn) && (!efx->pci_dev->is_physfn)) { @@ -937,6 +951,11 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) /* Link a buffer to each TX queue */ efx_for_each_channel(channel, efx) { + /* Extra channels, even those with TXQs (PTP), do not require + * PIO resources. + */ + if (!channel->type->want_pio) + continue; efx_for_each_channel_tx_queue(tx_queue, channel) { /* We assign the PIO buffers to queues in * reverse order to allow for the following @@ -1284,7 +1303,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) void __iomem *membase; int rc; - channel_vis = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + channel_vis = max(efx->n_channels, + (efx->n_tx_channels + efx->n_extra_tx_channels) * + EFX_TXQ_TYPES); #ifdef EFX_USE_PIO /* Try to allocate PIO buffers if wanted and if the full @@ -2408,12 +2429,25 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) int i; BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + /* Only attempt to enable TX timestamping if we have the license for it, + * otherwise TXQ init will fail + */ + if (!(nic_data->licensed_features & + (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) { + tx_queue->timestamping = false; + /* Disable sync events on this channel. */ + if (efx->type->ptp_set_ts_sync_events) + efx->type->ptp_set_ts_sync_events(efx, false, false); + } + /* TSOv2 is a limited resource that can only be configured on a limited * number of queues. TSO without checksum offload is not really a thing, * so we only enable it for those queues. + * TSOv2 cannot be used with Hardware timestamping. */ if (csum_offload && (nic_data->datapath_caps2 & - (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) { + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) && + !tx_queue->timestamping) { tso_v2 = true; netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", channel->channel); @@ -2439,14 +2473,16 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); do { - MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS, + MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, /* This flag was removed from mcdi_pcol.h for * the non-_EXT version of INIT_TXQ. However, * firmware still honours it. */ INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, - INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, + INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, + tx_queue->timestamping); rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, NULL, 0, NULL); @@ -2472,12 +2508,13 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION; tx_queue->insert_count = 1; txd = efx_tx_desc(tx_queue, 0); - EFX_POPULATE_QWORD_4(*txd, + EFX_POPULATE_QWORD_5(*txd, ESF_DZ_TX_DESC_IS_OPT, true, ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_CRC_CSUM, ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, - ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); + ESF_DZ_TX_OPTION_IP_CSUM, csum_offload, + ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping); tx_queue->write_count = 1; if (tso_v2) { @@ -3572,31 +3609,92 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel, return n_packets; } -static int +static u32 efx_ef10_extract_event_ts(efx_qword_t *event) +{ + u32 tstamp; + + tstamp = EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_HI); + tstamp <<= 16; + tstamp |= EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_LO); + + return tstamp; +} + +static void efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) { struct efx_nic *efx = channel->efx; struct efx_tx_queue *tx_queue; unsigned int tx_ev_desc_ptr; unsigned int tx_ev_q_label; - int tx_descs = 0; + unsigned int tx_ev_type; + u64 ts_part; if (unlikely(READ_ONCE(efx->reset_pending))) - return 0; + return; if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) - return 0; + return; - /* Transmit completion */ - tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); + /* Get the transmit queue */ tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); tx_queue = efx_channel_get_tx_queue(channel, tx_ev_q_label % EFX_TXQ_TYPES); - tx_descs = ((tx_ev_desc_ptr + 1 - tx_queue->read_count) & - tx_queue->ptr_mask); - efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); - return tx_descs; + if (!tx_queue->timestamping) { + /* Transmit completion */ + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); + efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); + return; + } + + /* Transmit timestamps are only available for 8XXX series. They result + * in three events per packet. These occur in order, and are: + * - the normal completion event + * - the low part of the timestamp + * - the high part of the timestamp + * + * Each part of the timestamp is itself split across two 16 bit + * fields in the event. + */ + tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1); + + switch (tx_ev_type) { + case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION: + /* In case of Queue flush or FLR, we might have received + * the previous TX completion event but not the Timestamp + * events. + */ + if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask) + efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr); + + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, + ESF_DZ_TX_DESCR_INDX); + tx_queue->completed_desc_ptr = + tx_ev_desc_ptr & tx_queue->ptr_mask; + break; + + case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO: + ts_part = efx_ef10_extract_event_ts(event); + tx_queue->completed_timestamp_minor = ts_part; + break; + + case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI: + ts_part = efx_ef10_extract_event_ts(event); + tx_queue->completed_timestamp_major = ts_part; + + efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr); + tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + break; + + default: + netif_err(efx, hw, efx->net_dev, + "channel %d unknown tx event type %d (data " + EFX_QWORD_FMT ")\n", + channel->channel, tx_ev_type, + EFX_QWORD_VAL(*event)); + break; + } } static void @@ -3658,7 +3756,6 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota) efx_qword_t event, *p_event; unsigned int read_ptr; int ev_code; - int tx_descs = 0; int spent = 0; if (quota <= 0) @@ -3698,13 +3795,7 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota) } break; case ESE_DZ_EV_CODE_TX_EV: - tx_descs += efx_ef10_handle_tx_event(channel, &event); - if (tx_descs > efx->txq_entries) { - spent = quota; - goto out; - } else if (++spent == quota) { - goto out; - } + efx_ef10_handle_tx_event(channel, &event); break; case ESE_DZ_EV_CODE_DRIVER_EV: efx_ef10_handle_driver_event(channel, &event); @@ -6179,7 +6270,8 @@ static int efx_ef10_ptp_set_ts_sync_events(struct efx_nic *efx, bool en, efx_ef10_rx_enable_timestamping : efx_ef10_rx_disable_timestamping; - efx_for_each_channel(channel, efx) { + channel = efx_ptp_channel(efx); + if (channel) { int rc = set(channel, temp); if (en && rc != 0) { efx_ef10_ptp_set_ts_sync_events(efx, false, temp); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 12f0abc30cb1..456866b05641 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -896,12 +896,20 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); } +bool efx_default_channel_want_txqs(struct efx_channel *channel) +{ + return channel->channel - channel->efx->tx_channel_offset < + channel->efx->n_tx_channels; +} + static const struct efx_channel_type efx_default_channel_type = { .pre_probe = efx_channel_dummy_op_int, .post_remove = efx_channel_dummy_op_void, .get_name = efx_get_channel_name, .copy = efx_copy_channel, + .want_txqs = efx_default_channel_want_txqs, .keep_eventq = false, + .want_pio = true, }; int efx_channel_dummy_op_int(struct efx_channel *channel) @@ -1501,6 +1509,7 @@ static int efx_probe_interrupts(struct efx_nic *efx) } /* Assign extra channels if possible */ + efx->n_extra_tx_channels = 0; j = efx->n_channels; for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { if (!efx->extra_channel_type[i]) @@ -1512,6 +1521,8 @@ static int efx_probe_interrupts(struct efx_nic *efx) --j; efx_get_channel(efx, j)->type = efx->extra_channel_type[i]; + if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) + efx->n_extra_tx_channels++; } } diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 5334dc83d926..266b9bee1f3a 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -818,17 +818,16 @@ static void efx_farch_magic_event(struct efx_channel *channel, u32 magic) * The NIC batches TX completion events; the message we receive is of * the form "complete all TX events up to this index". */ -static int +static void efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) { unsigned int tx_ev_desc_ptr; unsigned int tx_ev_q_label; struct efx_tx_queue *tx_queue; struct efx_nic *efx = channel->efx; - int tx_packets = 0; if (unlikely(READ_ONCE(efx->reset_pending))) - return 0; + return; if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { /* Transmit completion */ @@ -836,8 +835,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); tx_queue = efx_channel_get_tx_queue( channel, tx_ev_q_label % EFX_TXQ_TYPES); - tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) & - tx_queue->ptr_mask); efx_xmit_done(tx_queue, tx_ev_desc_ptr); } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) { /* Rewrite the FIFO write pointer */ @@ -856,8 +853,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) EFX_QWORD_FMT"\n", channel->channel, EFX_QWORD_VAL(*event)); } - - return tx_packets; } /* Detect errors included in the rx_evt_pkt_ok bit. */ @@ -1090,7 +1085,7 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) int qid; qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); - if (qid < EFX_TXQ_TYPES * efx->n_tx_channels) { + if (qid < EFX_TXQ_TYPES * (efx->n_tx_channels + efx->n_extra_tx_channels)) { tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES, qid % EFX_TXQ_TYPES); if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) { @@ -1270,7 +1265,6 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget) unsigned int read_ptr; efx_qword_t event, *p_event; int ev_code; - int tx_packets = 0; int spent = 0; if (budget <= 0) @@ -1304,12 +1298,7 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget) goto out; break; case FSE_AZ_EV_CODE_TX_EV: - tx_packets += efx_farch_handle_tx_event(channel, - &event); - if (tx_packets > efx->txq_entries) { - spent = budget; - goto out; - } + efx_farch_handle_tx_event(channel, &event); break; case FSE_AZ_EV_CODE_DRV_GEN_EV: efx_farch_handle_generated_event(channel, &event); @@ -1680,20 +1669,21 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx) */ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) { - unsigned vi_count, buftbl_min; + unsigned vi_count, buftbl_min, total_tx_channels; #ifdef CONFIG_SFC_SRIOV struct siena_nic_data *nic_data = efx->nic_data; #endif + total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels; /* Account for the buffer table entries backing the datapath channels * and the descriptor caches for those channels. */ buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE + - efx->n_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE + + total_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE + efx->n_channels * EFX_MAX_EVQ_SIZE) * sizeof(efx_qword_t) / EFX_BUF_SIZE); - vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + vi_count = max(efx->n_channels, total_tx_channels * EFX_TXQ_TYPES); #ifdef CONFIG_SFC_SRIOV if (efx->type->sriov_wanted) { diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 3dd42f3136fe..d20a8660ee48 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -191,6 +191,7 @@ struct efx_tx_buffer { * Size of the region is efx_piobuf_size. * @piobuf_offset: Buffer offset to be specified in PIO descriptors * @initialised: Has hardware queue been initialised? + * @timestamping: Is timestamping enabled for this channel? * @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and * may also map tx data, depending on the nature of the TSO implementation. * @read_count: Current read pointer. @@ -202,6 +203,10 @@ struct efx_tx_buffer { * avoid cache-line ping-pong between the xmit path and the * completion path. * @merge_events: Number of TX merged completion events + * @completed_desc_ptr: Most recent completed pointer - only used with + * timestamping. + * @completed_timestamp_major: Top part of the most recent tx timestamp. + * @completed_timestamp_minor: Low part of the most recent tx timestamp. * @insert_count: Current insert pointer * This is the number of buffers that have been added to the * software ring. @@ -247,6 +252,7 @@ struct efx_tx_queue { void __iomem *piobuf; unsigned int piobuf_offset; bool initialised; + bool timestamping; /* Function pointers used in the fast path. */ int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *); @@ -257,6 +263,9 @@ struct efx_tx_queue { unsigned int merge_events; unsigned int bytes_compl; unsigned int pkts_compl; + unsigned int completed_desc_ptr; + u32 completed_timestamp_major; + u32 completed_timestamp_minor; /* Members used only on the xmit path */ unsigned int insert_count ____cacheline_aligned_in_smp; @@ -522,8 +531,12 @@ struct efx_msi_context { * @copy: Copy the channel state prior to reallocation. May be %NULL if * reallocation is not supported. * @receive_skb: Handle an skb ready to be passed to netif_receive_skb() + * @want_txqs: Determine whether this channel should have TX queues + * created. If %NULL, TX queues are not created. * @keep_eventq: Flag for whether event queue should be kept initialised * while the device is stopped + * @want_pio: Flag for whether PIO buffers should be linked to this + * channel's TX queues. */ struct efx_channel_type { void (*handle_no_channel)(struct efx_nic *); @@ -532,7 +545,9 @@ struct efx_channel_type { void (*get_name)(struct efx_channel *, char *buf, size_t len); struct efx_channel *(*copy)(const struct efx_channel *); bool (*receive_skb)(struct efx_channel *, struct sk_buff *); + bool (*want_txqs)(struct efx_channel *); bool keep_eventq; + bool want_pio; }; enum efx_led_mode { @@ -735,6 +750,7 @@ struct vfdi_status; * @n_channels: Number of channels in use * @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_tx_channels: Number of channels used for TX + * @n_extra_tx_channels: Number of extra channels with TX queues * @rx_ip_align: RX DMA address offset to have IP header aligned in * in accordance with NET_IP_ALIGN * @rx_dma_len: Current maximum RX DMA length @@ -881,6 +897,7 @@ struct efx_nic { unsigned rss_spread; unsigned tx_channel_offset; unsigned n_tx_channels; + unsigned n_extra_tx_channels; unsigned int rx_ip_align; unsigned int rx_dma_len; unsigned int rx_buffer_order; @@ -1363,8 +1380,8 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type) static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return channel->channel - channel->efx->tx_channel_offset < - channel->efx->n_tx_channels; + return channel->type && channel->type->want_txqs && + channel->type->want_txqs(channel); } static inline struct efx_tx_queue * diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 763052214525..6549fc685a48 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -440,6 +440,7 @@ struct efx_ef10_nic_data { struct efx_udp_tunnel udp_tunnels[16]; bool udp_tunnels_dirty; struct mutex udp_tunnels_lock; + u64 licensed_features; }; int efx_init_sriov(void); @@ -448,6 +449,7 @@ void efx_fini_sriov(void); struct ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); +struct efx_channel *efx_ptp_channel(struct efx_nic *efx); void efx_ptp_remove(struct efx_nic *efx); int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr); int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr); @@ -471,6 +473,8 @@ static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel, } void efx_ptp_start_datapath(struct efx_nic *efx); void efx_ptp_stop_datapath(struct efx_nic *efx); +bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx); +ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue); extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 3b37d7ded3c4..433d29d6bc95 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -149,18 +149,14 @@ enum ptp_packet_state { /* Maximum parts-per-billion adjustment that is acceptable */ #define MAX_PPB 1000000 -/* Number of bits required to hold the above */ -#define MAX_PPB_BITS 20 - -/* Number of extra bits allowed when calculating fractional ns. - * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should - * be less than 63. - */ -#define PPB_EXTRA_BITS 2 - /* Precalculate scale word to avoid long long division at runtime */ -#define PPB_SCALE_WORD ((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\ - MAX_PPB_BITS)) / 1000000000LL) +/* This is equivalent to 2^66 / 10^9. */ +#define PPB_SCALE_WORD ((1LL << (57)) / 1953125LL) + +/* How much to shift down after scaling to convert to FP40 */ +#define PPB_SHIFT_FP40 26 +/* ... and FP44. */ +#define PPB_SHIFT_FP44 22 #define PTP_SYNC_ATTEMPTS 4 @@ -218,8 +214,8 @@ struct efx_ptp_timeset { * @channel: The PTP channel (Siena only) * @rx_ts_inline: Flag for whether RX timestamps are inline (else they are * separate events) - * @rxq: Receive queue (awaiting timestamps) - * @txq: Transmit queue + * @rxq: Receive SKB queue (awaiting timestamps) + * @txq: Transmit SKB queue * @evt_list: List of MC receive events awaiting packets * @evt_free_list: List of free events * @evt_lock: Lock for manipulating evt_list and evt_free_list @@ -233,19 +229,36 @@ struct efx_ptp_timeset { * @config: Current timestamp configuration * @enabled: PTP operation enabled * @mode: Mode in which PTP operating (PTP version) - * @time_format: Time format supported by this NIC * @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time * @nic_to_kernel_time: Function to convert from NIC to kernel time + * @nic_time.minor_max: Wrap point for NIC minor times + * @nic_time.sync_event_diff_min: Minimum acceptable difference between time + * in packet prefix and last MCDI time sync event i.e. how much earlier than + * the last sync event time a packet timestamp can be. + * @nic_time.sync_event_diff_max: Maximum acceptable difference between time + * in packet prefix and last MCDI time sync event i.e. how much later than + * the last sync event time a packet timestamp can be. + * @nic_time.sync_event_minor_shift: Shift required to make minor time from + * field in MCDI time sync event. * @min_synchronisation_ns: Minimum acceptable corrected sync window - * @ts_corrections.tx: Required driver correction of transmit timestamps - * @ts_corrections.rx: Required driver correction of receive timestamps + * @capabilities: Capabilities flags from the NIC + * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit + * timestamps + * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive + * timestamps * @ts_corrections.pps_out: PPS output error (information only) * @ts_corrections.pps_in: Required driver correction of PPS input timestamps + * @ts_corrections.general_tx: Required driver correction of general packet + * transmit timestamps + * @ts_corrections.general_rx: Required driver correction of general packet + * receive timestamps * @evt_frags: Partly assembled PTP events * @evt_frag_idx: Current fragment number * @evt_code: Last event code * @start: Address at which MC indicates ready for synchronisation * @host_time_pps: Host time at last PPS + * @adjfreq_ppb_shift: Shift required to convert scaled parts-per-billion + * frequency adjustment into a fixed point fractional nanosecond format. * @current_adjfreq: Current ppb adjustment. * @phc_clock: Pointer to registered phc device (if primary function) * @phc_clock_info: Registration structure for phc device @@ -264,6 +277,7 @@ struct efx_ptp_timeset { * @oversize_sync_windows: Number of corrected sync windows that are too large * @rx_no_timestamp: Number of packets received without a timestamp. * @timeset: Last set of synchronisation statistics. + * @xmit_skb: Transmit SKB function. */ struct efx_ptp_data { struct efx_nic *efx; @@ -284,22 +298,31 @@ struct efx_ptp_data { struct hwtstamp_config config; bool enabled; unsigned int mode; - unsigned int time_format; void (*ns_to_nic_time)(s64 ns, u32 *nic_major, u32 *nic_minor); ktime_t (*nic_to_kernel_time)(u32 nic_major, u32 nic_minor, s32 correction); + struct { + u32 minor_max; + u32 sync_event_diff_min; + u32 sync_event_diff_max; + unsigned int sync_event_minor_shift; + } nic_time; unsigned int min_synchronisation_ns; + unsigned int capabilities; struct { - s32 tx; - s32 rx; + s32 ptp_tx; + s32 ptp_rx; s32 pps_out; s32 pps_in; + s32 general_tx; + s32 general_rx; } ts_corrections; efx_qword_t evt_frags[MAX_EVENT_FRAGS]; int evt_frag_idx; int evt_code; struct efx_buffer start; struct pps_event_time host_time_pps; + unsigned int adjfreq_ppb_shift; s64 current_adjfreq; struct ptp_clock *phc_clock; struct ptp_clock_info phc_clock_info; @@ -319,6 +342,7 @@ struct efx_ptp_data { unsigned int rx_no_timestamp; struct efx_ptp_timeset timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM]; + void (*xmit_skb)(struct efx_nic *efx, struct sk_buff *skb); }; static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta); @@ -329,6 +353,24 @@ static int efx_phc_settime(struct ptp_clock_info *ptp, static int efx_phc_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); +bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + return ((efx_nic_rev(efx) >= EFX_REV_HUNT_A0) && + (nic_data->datapath_caps2 & + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN) + )); +} + +/* PTP 'extra' channel is still a traffic channel, but we only create TX queues + * if PTP uses MAC TX timestamps, not if PTP uses the MC directly to transmit. + */ +bool efx_ptp_want_txqs(struct efx_channel *channel) +{ + return efx_ptp_use_mac_tx_timestamps(channel->efx); +} + #define PTP_SW_STAT(ext_name, field_name) \ { #ext_name, 0, offsetof(struct efx_ptp_data, field_name) } #define PTP_MC_STAT(ext_name, mcdi_name) \ @@ -471,6 +513,89 @@ static ktime_t efx_ptp_s27_to_ktime_correction(u32 nic_major, u32 nic_minor, return efx_ptp_s27_to_ktime(nic_major, nic_minor); } +/* For Medford2 platforms the time is in seconds and quarter nanoseconds. */ +static void efx_ptp_ns_to_s_qns(s64 ns, u32 *nic_major, u32 *nic_minor) +{ + struct timespec64 ts = ns_to_timespec64(ns); + + *nic_major = (u32)ts.tv_sec; + *nic_minor = ts.tv_nsec * 4; +} + +static ktime_t efx_ptp_s_qns_to_ktime_correction(u32 nic_major, u32 nic_minor, + s32 correction) +{ + ktime_t kt; + + nic_minor = DIV_ROUND_CLOSEST(nic_minor, 4); + correction = DIV_ROUND_CLOSEST(correction, 4); + + kt = ktime_set(nic_major, nic_minor); + + if (correction >= 0) + kt = ktime_add_ns(kt, (u64)correction); + else + kt = ktime_sub_ns(kt, (u64)-correction); + return kt; +} + +struct efx_channel *efx_ptp_channel(struct efx_nic *efx) +{ + return efx->ptp_data ? efx->ptp_data->channel : NULL; +} + +static u32 last_sync_timestamp_major(struct efx_nic *efx) +{ + struct efx_channel *channel = efx_ptp_channel(efx); + u32 major = 0; + + if (channel) + major = channel->sync_timestamp_major; + return major; +} + +/* The 8000 series and later can provide the time from the MAC, which is only + * 48 bits long and provides meta-information in the top 2 bits. + */ +static ktime_t +efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx, + struct efx_ptp_data *ptp, + u32 nic_major, u32 nic_minor, + s32 correction) +{ + ktime_t kt = { 0 }; + + if (!(nic_major & 0x80000000)) { + WARN_ON_ONCE(nic_major >> 16); + /* Use the top bits from the latest sync event. */ + nic_major &= 0xffff; + nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000); + + kt = ptp->nic_to_kernel_time(nic_major, nic_minor, + correction); + } + return kt; +} + +ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + ktime_t kt; + + if (efx_ptp_use_mac_tx_timestamps(efx)) + kt = efx_ptp_mac_nic_to_ktime_correction(efx, ptp, + tx_queue->completed_timestamp_major, + tx_queue->completed_timestamp_minor, + ptp->ts_corrections.general_tx); + else + kt = ptp->nic_to_kernel_time( + tx_queue->completed_timestamp_major, + tx_queue->completed_timestamp_minor, + ptp->ts_corrections.general_tx); + return kt; +} + /* Get PTP attributes and set up time conversions */ static int efx_ptp_get_attributes(struct efx_nic *efx) { @@ -502,31 +627,71 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) return rc; } - if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION) { + switch (fmt) { + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION: ptp->ns_to_nic_time = efx_ptp_ns_to_s27; ptp->nic_to_kernel_time = efx_ptp_s27_to_ktime_correction; - } else if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS) { + ptp->nic_time.minor_max = 1 << 27; + ptp->nic_time.sync_event_minor_shift = 19; + break; + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS: ptp->ns_to_nic_time = efx_ptp_ns_to_s_ns; ptp->nic_to_kernel_time = efx_ptp_s_ns_to_ktime_correction; - } else { + ptp->nic_time.minor_max = 1000000000; + ptp->nic_time.sync_event_minor_shift = 22; + break; + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_QTR_NANOSECONDS: + ptp->ns_to_nic_time = efx_ptp_ns_to_s_qns; + ptp->nic_to_kernel_time = efx_ptp_s_qns_to_ktime_correction; + ptp->nic_time.minor_max = 4000000000; + ptp->nic_time.sync_event_minor_shift = 24; + break; + default: return -ERANGE; } - ptp->time_format = fmt; - - /* MC_CMD_PTP_OP_GET_ATTRIBUTES is an extended version of an older - * operation MC_CMD_PTP_OP_GET_TIME_FORMAT that also returns a value - * to use for the minimum acceptable corrected synchronization window. + /* Precalculate acceptable difference between the minor time in the + * packet prefix and the last MCDI time sync event. We expect the + * packet prefix timestamp to be after of sync event by up to one + * sync event interval (0.25s) but we allow it to exceed this by a + * fuzz factor of (0.1s) + */ + ptp->nic_time.sync_event_diff_min = ptp->nic_time.minor_max + - (ptp->nic_time.minor_max / 10); + ptp->nic_time.sync_event_diff_max = (ptp->nic_time.minor_max / 4) + + (ptp->nic_time.minor_max / 10); + + /* MC_CMD_PTP_OP_GET_ATTRIBUTES has been extended twice from an older + * operation MC_CMD_PTP_OP_GET_TIME_FORMAT. The function now may return + * a value to use for the minimum acceptable corrected synchronization + * window and may return further capabilities. * If we have the extra information store it. For older firmware that * does not implement the extended command use the default value. */ - if (rc == 0 && out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN) + if (rc == 0 && + out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST) ptp->min_synchronisation_ns = MCDI_DWORD(outbuf, PTP_OUT_GET_ATTRIBUTES_SYNC_WINDOW_MIN); else ptp->min_synchronisation_ns = DEFAULT_MIN_SYNCHRONISATION_NS; + if (rc == 0 && + out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN) + ptp->capabilities = MCDI_DWORD(outbuf, + PTP_OUT_GET_ATTRIBUTES_CAPABILITIES); + else + ptp->capabilities = 0; + + /* Set up the shift for conversion between frequency + * adjustments in parts-per-billion and the fixed-point + * fractional ns format that the adapter uses. + */ + if (ptp->capabilities & (1 << MC_CMD_PTP_OUT_GET_ATTRIBUTES_FP44_FREQ_ADJ_LBN)) + ptp->adjfreq_ppb_shift = PPB_SHIFT_FP44; + else + ptp->adjfreq_ppb_shift = PPB_SHIFT_FP40; + return 0; } @@ -534,8 +699,9 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_TIMESTAMP_CORRECTIONS_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN); int rc; + size_t out_len; /* Get the timestamp corrections from the NIC. If this operation is * not supported (older NICs) then no correction is required. @@ -545,21 +711,37 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), NULL); + outbuf, sizeof(outbuf), &out_len); if (rc == 0) { - efx->ptp_data->ts_corrections.tx = MCDI_DWORD(outbuf, + efx->ptp_data->ts_corrections.ptp_tx = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT); - efx->ptp_data->ts_corrections.rx = MCDI_DWORD(outbuf, + efx->ptp_data->ts_corrections.ptp_rx = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE); efx->ptp_data->ts_corrections.pps_out = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT); efx->ptp_data->ts_corrections.pps_in = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN); + + if (out_len >= MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN) { + efx->ptp_data->ts_corrections.general_tx = MCDI_DWORD( + outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX); + efx->ptp_data->ts_corrections.general_rx = MCDI_DWORD( + outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX); + } else { + efx->ptp_data->ts_corrections.general_tx = + efx->ptp_data->ts_corrections.ptp_tx; + efx->ptp_data->ts_corrections.general_rx = + efx->ptp_data->ts_corrections.ptp_rx; + } } else if (rc == -EINVAL) { - efx->ptp_data->ts_corrections.tx = 0; - efx->ptp_data->ts_corrections.rx = 0; + efx->ptp_data->ts_corrections.ptp_tx = 0; + efx->ptp_data->ts_corrections.ptp_rx = 0; efx->ptp_data->ts_corrections.pps_out = 0; efx->ptp_data->ts_corrections.pps_in = 0; + efx->ptp_data->ts_corrections.general_tx = 0; + efx->ptp_data->ts_corrections.general_rx = 0; } else { efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf, sizeof(outbuf), rc); @@ -873,8 +1055,24 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) return rc; } +/* Transmit a PTP packet via the dedicated hardware timestamped queue. */ +static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp_data = efx->ptp_data; + struct efx_tx_queue *tx_queue; + u8 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; + + tx_queue = &ptp_data->channel->tx_queue[type]; + if (tx_queue && tx_queue->timestamping) { + efx_enqueue_skb(tx_queue, skb); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); + } +} + /* Transmit a PTP packet, via the MCDI interface, to the wire. */ -static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb) +static void efx_ptp_xmit_skb_mc(struct efx_nic *efx, struct sk_buff *skb) { struct efx_ptp_data *ptp_data = efx->ptp_data; struct skb_shared_hwtstamps timestamps; @@ -910,16 +1108,16 @@ static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb) timestamps.hwtstamp = ptp_data->nic_to_kernel_time( MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MAJOR), MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MINOR), - ptp_data->ts_corrections.tx); + ptp_data->ts_corrections.ptp_tx); skb_tstamp_tx(skb, ×tamps); rc = 0; fail: - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); - return rc; + return; } static void efx_ptp_drop_time_expired_events(struct efx_nic *efx) @@ -1189,7 +1387,7 @@ static void efx_ptp_worker(struct work_struct *work) efx_ptp_process_events(efx, &tempq); while ((skb = skb_dequeue(&ptp_data->txq))) - efx_ptp_xmit_skb(efx, skb); + ptp_data->xmit_skb(efx, skb); while ((skb = __skb_dequeue(&tempq))) efx_ptp_process_rx(efx, skb); @@ -1239,6 +1437,14 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) goto fail2; } + if (efx_ptp_use_mac_tx_timestamps(efx)) { + ptp->xmit_skb = efx_ptp_xmit_skb_queue; + /* Request sync events on this channel. */ + channel->sync_events_state = SYNC_EVENTS_QUIESCENT; + } else { + ptp->xmit_skb = efx_ptp_xmit_skb_mc; + } + INIT_WORK(&ptp->work, efx_ptp_worker); ptp->config.flags = 0; ptp->config.tx_type = HWTSTAMP_TX_OFF; @@ -1303,11 +1509,21 @@ fail1: static int efx_ptp_probe_channel(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; + int rc; channel->irq_moderation_us = 0; channel->rx_queue.core_index = 0; - return efx_ptp_probe(efx, channel); + rc = efx_ptp_probe(efx, channel); + /* Failure to probe PTP is not fatal; this channel will just not be + * used for anything. + * In the case of EPERM, efx_ptp_probe will print its own message (in + * efx_ptp_get_attributes()), so we don't need to. + */ + if (rc && rc != -EPERM) + netif_warn(efx, drv, efx->net_dev, + "Failed to probe PTP, rc=%d\n", rc); + return 0; } void efx_ptp_remove(struct efx_nic *efx) @@ -1332,6 +1548,7 @@ void efx_ptp_remove(struct efx_nic *efx) efx_nic_free_buffer(efx, &efx->ptp_data->start); kfree(efx->ptp_data); + efx->ptp_data = NULL; } static void efx_ptp_remove_channel(struct efx_channel *channel) @@ -1548,6 +1765,17 @@ void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info) ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE); + /* Check licensed features. If we don't have the license for TX + * timestamps, the NIC will not support them. + */ + if (efx_ptp_use_mac_tx_timestamps(efx)) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (!(nic_data->licensed_features & + (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) + ts_info->so_timestamping &= + ~SOF_TIMESTAMPING_TX_HARDWARE; + } if (primary && primary->ptp_data && primary->ptp_data->phc_clock) ts_info->phc_index = ptp_clock_index(primary->ptp_data->phc_clock); @@ -1627,7 +1855,7 @@ static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp) evt->hwtimestamp = efx->ptp_data->nic_to_kernel_time( EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA), EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA), - ptp->ts_corrections.rx); + ptp->ts_corrections.ptp_rx); evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); list_add_tail(&evt->link, &ptp->evt_list); @@ -1709,9 +1937,20 @@ void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev) { + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + + /* When extracting the sync timestamp minor value, we should discard + * the least significant two bits. These are not required in order + * to reconstruct full-range timestamps and they are optionally used + * to report status depending on the options supplied when subscribing + * for sync events. + */ channel->sync_timestamp_major = MCDI_EVENT_FIELD(*ev, PTP_TIME_MAJOR); channel->sync_timestamp_minor = - MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_26_19) << 19; + (MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_MS_8BITS) & 0xFC) + << ptp->nic_time.sync_event_minor_shift; + /* if sync events have been disabled then we want to silently ignore * this event, so throw away result. */ @@ -1719,15 +1958,6 @@ void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev) SYNC_EVENTS_VALID); } -/* make some assumptions about the time representation rather than abstract it, - * since we currently only support one type of inline timestamping and only on - * EF10. - */ -#define MINOR_TICKS_PER_SECOND 0x8000000 -/* Fuzz factor for sync events to be out of order with RX events */ -#define FUZZ (MINOR_TICKS_PER_SECOND / 10) -#define EXPECTED_SYNC_EVENTS_PER_SECOND 4 - static inline u32 efx_rx_buf_timestamp_minor(struct efx_nic *efx, const u8 *eh) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) @@ -1745,31 +1975,33 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel, struct sk_buff *skb) { struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; u32 pkt_timestamp_major, pkt_timestamp_minor; u32 diff, carry; struct skb_shared_hwtstamps *timestamps; - pkt_timestamp_minor = (efx_rx_buf_timestamp_minor(efx, - skb_mac_header(skb)) + - (u32) efx->ptp_data->ts_corrections.rx) & - (MINOR_TICKS_PER_SECOND - 1); + if (channel->sync_events_state != SYNC_EVENTS_VALID) + return; + + pkt_timestamp_minor = efx_rx_buf_timestamp_minor(efx, skb_mac_header(skb)); /* get the difference between the packet and sync timestamps, * modulo one second */ - diff = (pkt_timestamp_minor - channel->sync_timestamp_minor) & - (MINOR_TICKS_PER_SECOND - 1); + diff = pkt_timestamp_minor - channel->sync_timestamp_minor; + if (pkt_timestamp_minor < channel->sync_timestamp_minor) + diff += ptp->nic_time.minor_max; + /* do we roll over a second boundary and need to carry the one? */ - carry = channel->sync_timestamp_minor + diff > MINOR_TICKS_PER_SECOND ? + carry = (channel->sync_timestamp_minor >= ptp->nic_time.minor_max - diff) ? 1 : 0; - if (diff <= MINOR_TICKS_PER_SECOND / EXPECTED_SYNC_EVENTS_PER_SECOND + - FUZZ) { + if (diff <= ptp->nic_time.sync_event_diff_max) { /* packet is ahead of the sync event by a quarter of a second or * less (allowing for fuzz) */ pkt_timestamp_major = channel->sync_timestamp_major + carry; - } else if (diff >= MINOR_TICKS_PER_SECOND - FUZZ) { + } else if (diff >= ptp->nic_time.sync_event_diff_min) { /* packet is behind the sync event but within the fuzz factor. * This means the RX packet and sync event crossed as they were * placed on the event queue, which can sometimes happen. @@ -1791,7 +2023,9 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel, /* attach the timestamps to the skb */ timestamps = skb_hwtstamps(skb); timestamps->hwtstamp = - efx_ptp_s27_to_ktime(pkt_timestamp_major, pkt_timestamp_minor); + ptp->nic_to_kernel_time(pkt_timestamp_major, + pkt_timestamp_minor, + ptp->ts_corrections.general_rx); } static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) @@ -1809,9 +2043,10 @@ static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) else if (delta < -MAX_PPB) delta = -MAX_PPB; - /* Convert ppb to fixed point ns. */ - adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >> - (PPB_EXTRA_BITS + MAX_PPB_BITS)); + /* Convert ppb to fixed point ns taking care to round correctly. */ + adjustment_ns = ((s64)delta * PPB_SCALE_WORD + + (1 << (ptp_data->adjfreq_ppb_shift - 1))) >> + ptp_data->adjfreq_ppb_shift; MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); MCDI_SET_DWORD(inadj, PTP_IN_PERIPH_ID, 0); @@ -1911,13 +2146,14 @@ static int efx_phc_enable(struct ptp_clock_info *ptp, return 0; } -static const struct efx_channel_type efx_ptp_channel_type = { +const struct efx_channel_type efx_ptp_channel_type = { .handle_no_channel = efx_ptp_handle_no_channel, .pre_probe = efx_ptp_probe_channel, .post_remove = efx_ptp_remove_channel, .get_name = efx_ptp_get_channel_name, /* no copy operation; there is no need to reallocate this channel */ .receive_skb = efx_ptp_rx, + .want_txqs = efx_ptp_want_txqs, .keep_eventq = false, }; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 9937a2450e57..cece961f2e82 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -77,9 +77,23 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, } if (buffer->flags & EFX_TX_BUF_SKB) { + struct sk_buff *skb = (struct sk_buff *)buffer->skb; + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); (*pkts_compl)++; - (*bytes_compl) += buffer->skb->len; + (*bytes_compl) += skb->len; + if (tx_queue->timestamping && + (tx_queue->completed_timestamp_major || + tx_queue->completed_timestamp_minor)) { + struct skb_shared_hwtstamps hwtstamp; + + hwtstamp.hwtstamp = + efx_ptp_nic_to_kernel_time(tx_queue); + skb_tstamp_tx(skb, &hwtstamp); + + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + } dev_consume_skb_any((struct sk_buff *)buffer->skb); netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", @@ -828,6 +842,11 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; tx_queue->xmit_more_available = false; + tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && + tx_queue->channel == efx_ptp_channel(efx)); + tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; /* Set up default function pointers. These may get replaced by * efx_nic_init_tx() based off NIC/queue capabilities. |