From cd44bc40a1f1eb4e259889579d599f30b1287828 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 28 Feb 2019 14:31:31 +0100 Subject: mt76: introduce q->stopped parameter Introduce mt76_queue stopped parameter in order to run ieee80211_wake_queue only when mac80211 queues have been previously stopped and avoid to disable interrupts when it is not necessary Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 5 ++++- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/tx.c | 5 ++++- drivers/net/wireless/mediatek/mt76/usb.c | 6 +++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 6eedc0ec7661..99e341cb1f92 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -180,7 +180,10 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush) else mt76_dma_sync_idx(dev, q); - wake = wake && qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; + wake = wake && q->stopped && + qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; + if (wake) + q->stopped = false; if (!q->queued) wake_up(&dev->tx_wait); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5dfb0601f101..3152b8c73c4a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -126,6 +126,7 @@ struct mt76_queue { int ndesc; int queued; int buf_size; + bool stopped; u8 buf_offset; u8 hw_idx; diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 5a349fe3e576..4d38a54014e8 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -289,8 +289,11 @@ mt76_tx(struct mt76_dev *dev, struct ieee80211_sta *sta, dev->queue_ops->tx_queue_skb(dev, q, skb, wcid, sta); dev->queue_ops->kick(dev, q); - if (q->queued > q->ndesc - 8) + if (q->queued > q->ndesc - 8 && !q->stopped) { ieee80211_stop_queue(dev->hw, skb_get_queue_mapping(skb)); + q->stopped = true; + } + spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(mt76_tx); diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index ae6ada370597..4c1abd492405 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -655,7 +655,11 @@ static void mt76u_tx_tasklet(unsigned long data) spin_lock_bh(&q->lock); } mt76_txq_schedule(dev, q); - wake = i < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; + + wake = q->stopped && q->queued < q->ndesc - 8; + if (wake) + q->stopped = false; + if (!q->queued) wake_up(&dev->tx_wait); -- cgit v1.2.3 From fc7801021733b9fbf213ae2bde5dc5e73896a9c7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Feb 2019 19:38:29 +0100 Subject: mt76: rewrite dma descriptor base and ring size on queue reset Useful in case the hardware reset clobbers these values Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 99e341cb1f92..76629b98c78d 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -130,6 +130,8 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx, static void mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q) { + iowrite32(q->desc_dma, &q->regs->desc_base); + iowrite32(q->ndesc, &q->regs->ring_size); q->head = ioread32(&q->regs->dma_idx); q->tail = q->head; iowrite32(q->head, &q->regs->cpu_idx); -- cgit v1.2.3 From de3c2af15fce23c42407ad0a868ac47df2e7279a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Feb 2019 19:40:27 +0100 Subject: mt76: mt76x02: when setting a key, use PN from mac80211 Preparation for full device restart support Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 91ff6598eccf..8109bac5aee6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -73,6 +73,7 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, enum mt76x02_cipher_type cipher; u8 key_data[32]; u8 iv_data[8]; + u64 pn; cipher = mt76x02_mac_get_key_info(key, key_data); if (cipher == MT_CIPHER_NONE && key) @@ -85,9 +86,22 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, if (key) { mt76_rmw_field(dev, MT_WCID_ATTR(idx), MT_WCID_ATTR_PAIRWISE, !!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)); + + pn = atomic64_read(&key->tx_pn); + iv_data[3] = key->keyidx << 6; - if (cipher >= MT_CIPHER_TKIP) + if (cipher >= MT_CIPHER_TKIP) { iv_data[3] |= 0x20; + put_unaligned_le32(pn >> 16, &iv_data[4]); + } + + if (cipher == MT_CIPHER_TKIP) { + iv_data[0] = (pn >> 8) & 0xff; + iv_data[1] = (iv_data[0] | 0x20) & 0x7f; + iv_data[2] = pn & 0xff; + } else if (cipher >= MT_CIPHER_AES_CCMP) { + put_unaligned_le16((pn & 0xffff), &iv_data[0]); + } } mt76_wr_copy(dev, MT_WCID_IV(idx), iv_data, sizeof(iv_data)); -- cgit v1.2.3 From 004960423fe17dfff93753017b7081dab36c7180 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Feb 2019 19:42:39 +0100 Subject: mt76: mt76x2: implement full device restart on watchdog reset Restart the firmware and re-initialize the MAC to be able to recover from more kinds of hang states Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 26 +++++++ drivers/net/wireless/mediatek/mt76/mt76x02_mac.h | 2 + drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 81 +++++++++++++++++++--- drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 4 ++ drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h | 1 + .../net/wireless/mediatek/mt76/mt76x2/pci_init.c | 2 +- .../net/wireless/mediatek/mt76/mt76x2/pci_mcu.c | 21 ++++++ drivers/net/wireless/mediatek/mt76/tx.c | 3 + 9 files changed, 132 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 3152b8c73c4a..cb50e96e736b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -144,6 +144,7 @@ struct mt76_mcu_ops { const struct mt76_reg_pair *rp, int len); int (*mcu_rd_rp)(struct mt76_dev *dev, u32 base, struct mt76_reg_pair *rp, int len); + int (*mcu_restart)(struct mt76_dev *dev); }; struct mt76_queue_ops { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 8109bac5aee6..e1e0c8da5a8c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -67,6 +67,32 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx, } EXPORT_SYMBOL_GPL(mt76x02_mac_shared_key_setup); +void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx, + struct ieee80211_key_conf *key) +{ + enum mt76x02_cipher_type cipher; + u8 key_data[32]; + u32 iv, eiv; + u64 pn; + + cipher = mt76x02_mac_get_key_info(key, key_data); + iv = mt76_rr(dev, MT_WCID_IV(idx)); + eiv = mt76_rr(dev, MT_WCID_IV(idx) + 4); + + pn = (u64)eiv << 16; + if (cipher == MT_CIPHER_TKIP) { + pn |= (iv >> 16) & 0xff; + pn |= (iv & 0xff) << 8; + } else if (cipher >= MT_CIPHER_AES_CCMP) { + pn |= iv & 0xffff; + } else { + return; + } + + atomic64_set(&key->tx_pn, pn); +} + + int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, struct ieee80211_key_conf *key) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h index 6b1f25d2f64c..caeeef96c42f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h @@ -177,6 +177,8 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx, u8 key_idx, struct ieee80211_key_conf *key); int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, struct ieee80211_key_conf *key); +void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx, + struct ieee80211_key_conf *key); void mt76x02_mac_wcid_setup(struct mt76x02_dev *dev, u8 idx, u8 vif_idx, u8 *mac); void mt76x02_mac_wcid_set_drop(struct mt76x02_dev *dev, u8 idx, bool drop); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 1229f19f2b02..9de015bcd4f9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -19,6 +19,7 @@ #include #include "mt76x02.h" +#include "mt76x02_mcu.h" #include "mt76x02_trace.h" struct beacon_bc_data { @@ -418,9 +419,65 @@ static bool mt76x02_tx_hang(struct mt76x02_dev *dev) return i < 4; } +static void mt76x02_key_sync(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, void *data) +{ + struct mt76x02_dev *dev = hw->priv; + struct mt76_wcid *wcid; + + if (!sta) + return; + + wcid = (struct mt76_wcid *) sta->drv_priv; + + if (wcid->hw_key_idx != key->keyidx || wcid->sw_iv) + return; + + mt76x02_mac_wcid_sync_pn(dev, wcid->idx, key); +} + +static void mt76x02_reset_state(struct mt76x02_dev *dev) +{ + int i; + + clear_bit(MT76_STATE_RUNNING, &dev->mt76.state); + + rcu_read_lock(); + + ieee80211_iter_keys_rcu(dev->mt76.hw, NULL, mt76x02_key_sync, NULL); + + for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid); i++) { + struct mt76_wcid *wcid = rcu_dereference(dev->mt76.wcid[i]); + struct mt76x02_sta *msta; + struct ieee80211_sta *sta; + struct ieee80211_vif *vif; + void *priv; + + if (!wcid) + continue; + + priv = msta = container_of(wcid, struct mt76x02_sta, wcid); + sta = container_of(priv, struct ieee80211_sta, drv_priv); + + priv = msta->vif; + vif = container_of(priv, struct ieee80211_vif, drv_priv); + + mt76_sta_state(dev->mt76.hw, vif, sta, + IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); + memset(msta, 0, sizeof(*msta)); + } + + rcu_read_unlock(); + + dev->vif_mask = 0; + dev->beacon_mask = 0; +} + static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) { u32 mask = dev->mt76.mmio.irqmask; + bool restart = dev->mt76.mcu_ops->mcu_restart; int i; ieee80211_stop_queues(dev->mt76.hw); @@ -432,6 +489,9 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) napi_disable(&dev->mt76.napi[i]); + if (restart) + mt76x02_reset_state(dev); + mutex_lock(&dev->mt76.mutex); if (dev->beacon_mask) @@ -452,20 +512,21 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) /* let fw reset DMA */ mt76_set(dev, 0x734, 0x3); + if (restart) + dev->mt76.mcu_ops->mcu_restart(&dev->mt76); + for (i = 0; i < ARRAY_SIZE(dev->mt76.q_tx); i++) mt76_queue_tx_cleanup(dev, i, true); for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++) mt76_queue_rx_reset(dev, i); - mt76_wr(dev, MT_MAC_SYS_CTRL, - MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX); - mt76_set(dev, MT_WPDMA_GLO_CFG, - MT_WPDMA_GLO_CFG_TX_DMA_EN | MT_WPDMA_GLO_CFG_RX_DMA_EN); + mt76x02_mac_start(dev); + if (dev->ed_monitor) mt76_set(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN); - if (dev->beacon_mask) + if (dev->beacon_mask && !restart) mt76_set(dev, MT_BEACON_TIME_CFG, MT_BEACON_TIME_CFG_BEACON_TX | MT_BEACON_TIME_CFG_TBTT_EN); @@ -486,9 +547,13 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) napi_schedule(&dev->mt76.napi[i]); } - ieee80211_wake_queues(dev->mt76.hw); - - mt76_txq_schedule_all(&dev->mt76); + if (restart) { + mt76x02_mcu_function_select(dev, Q_SELECT, 1); + ieee80211_restart_hw(dev->mt76.hw); + } else { + ieee80211_wake_queues(dev->mt76.hw); + mt76_txq_schedule_all(&dev->mt76); + } } static void mt76x02_check_tx_hang(struct mt76x02_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index a48c261b0c63..28eccb3119d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -237,6 +237,8 @@ int mt76x02_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv; int idx = 0; + memset(msta, 0, sizeof(*msta)); + idx = mt76_wcid_alloc(dev->mt76.wcid_mask, ARRAY_SIZE(dev->mt76.wcid)); if (idx < 0) return -ENOSPC; @@ -274,6 +276,8 @@ mt76x02_vif_init(struct mt76x02_dev *dev, struct ieee80211_vif *vif, struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv; struct mt76_txq *mtxq; + memset(mvif, 0, sizeof(*mvif)); + mvif->idx = idx; mvif->group_wcid.idx = MT_VIF_WCID(idx); mvif->group_wcid.hw_key_idx = -1; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h index 6c619f1c65c9..d7abe3d73bad 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h @@ -71,6 +71,7 @@ int mt76x2_mcu_load_cr(struct mt76x02_dev *dev, u8 type, u8 temp_level, void mt76x2_cleanup(struct mt76x02_dev *dev); +int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard); void mt76x2_reset_wlan(struct mt76x02_dev *dev, bool enable); void mt76x2_init_txpower(struct mt76x02_dev *dev, struct ieee80211_supported_band *sband); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c index 984d9c4c2e1a..d3927a13e92e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c @@ -77,7 +77,7 @@ mt76x2_fixup_xtal(struct mt76x02_dev *dev) } } -static int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard) +int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard) { const u8 *macaddr = dev->mt76.macaddr; u32 val; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c index 03e24ae7f66c..605dc66ae83b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c @@ -165,9 +165,30 @@ error: return -ENOENT; } +static int +mt76pci_mcu_restart(struct mt76_dev *mdev) +{ + struct mt76x02_dev *dev; + int ret; + + dev = container_of(mdev, struct mt76x02_dev, mt76); + + mt76x02_mcu_cleanup(dev); + mt76x2_mac_reset(dev, true); + + ret = mt76pci_load_firmware(dev); + if (ret) + return ret; + + mt76_wr(dev, MT_WPDMA_RST_IDX, ~0); + + return 0; +} + int mt76x2_mcu_init(struct mt76x02_dev *dev) { static const struct mt76_mcu_ops mt76x2_mcu_ops = { + .mcu_restart = mt76pci_mcu_restart, .mcu_send_msg = mt76x02_mcu_msg_send, }; int ret; diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 4d38a54014e8..fc7dffe066be 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -580,6 +580,9 @@ void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq) struct mt76_txq *mtxq = (struct mt76_txq *) txq->drv_priv; struct mt76_queue *hwq = mtxq->hwq; + if (!test_bit(MT76_STATE_RUNNING, &dev->state)) + return; + spin_lock_bh(&hwq->lock); if (list_empty(&mtxq->list)) list_add_tail(&mtxq->list, &hwq->swq); -- cgit v1.2.3 From 7b25d3b8e485c7721cba9c71b44d1c286e61c8e7 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 28 Feb 2019 16:11:06 +0100 Subject: mt76x02: fix hdr pointer in write txwi for USB Since we add txwi at the begining of skb->data, it no longer point to ieee80211_hdr. This breaks settings TS bit for probe response and beacons. Acked-by: Lorenzo Bianconi Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c index 43f07461c8d3..6fb52b596d42 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c @@ -85,8 +85,9 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, mt76x02_insert_hdr_pad(skb); - txwi = skb_push(skb, sizeof(struct mt76x02_txwi)); + txwi = (struct mt76x02_txwi *)(skb->data - sizeof(struct mt76x02_txwi)); mt76x02_mac_write_txwi(dev, txwi, skb, wcid, sta, len); + skb_push(skb, sizeof(struct mt76x02_txwi)); pid = mt76_tx_status_skb_add(mdev, wcid, skb); txwi->pktid = pid; -- cgit v1.2.3 From 3fd0824a2f800a2870569d385917ae1102647055 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 1 Mar 2019 16:51:03 +0100 Subject: mt76: mt76x02: only update the base mac address if necessary Also update the mask first before calculating the vif index. Fixes an issue where adding back the same interfaces in a different order fails because of duplicate vif index use Fixes: 06662264ce2ad ("mt76x02: use mask for vifs") Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 28eccb3119d1..cd072ac614f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -293,6 +293,12 @@ mt76x02_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) struct mt76x02_dev *dev = hw->priv; unsigned int idx = 0; + /* Allow to change address in HW if we create first interface. */ + if (!dev->vif_mask && + (((vif->addr[0] ^ dev->mt76.macaddr[0]) & ~GENMASK(4, 1)) || + memcmp(vif->addr + 1, dev->mt76.macaddr + 1, ETH_ALEN - 1))) + mt76x02_mac_setaddr(dev, vif->addr); + if (vif->addr[0] & BIT(1)) idx = 1 + (((dev->mt76.macaddr[0] ^ vif->addr[0]) >> 2) & 7); @@ -315,10 +321,6 @@ mt76x02_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) if (dev->vif_mask & BIT(idx)) return -EBUSY; - /* Allow to change address in HW if we create first interface. */ - if (!dev->vif_mask && !ether_addr_equal(dev->mt76.macaddr, vif->addr)) - mt76x02_mac_setaddr(dev, vif->addr); - dev->vif_mask |= BIT(idx); mt76x02_vif_init(dev, vif, idx); -- cgit v1.2.3 From a0ac806109277bd865b1048ec521f708b195670b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 2 Mar 2019 18:19:20 +0100 Subject: mt76: mt76x02: reduce false positives in ED/CCA tx blocking Full tx blocking (as opposed to CCA blocking) should only happen if there is a continuous non-802.11 signal above the energy detect threshold. Unfortunately the ED/CCA counter can't detect that, as it also counts 802.11 signals as busy. Similar to the vendor code, implement a learning mode that waits until the AGC gain has already been adjusted to the lowest value (due to false CCA events), and the number of false CCA events still remains high, and the blocking threshold is exceeded for more than 5 seconds. Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02.h | 3 +++ drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 25 ++++++++++++++++++++---- drivers/net/wireless/mediatek/mt76/mt76x02_phy.c | 2 ++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 6915cce5def9..42cc9da68f7b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -51,6 +51,7 @@ struct mt76x02_calibration { u16 false_cca; s8 avg_rssi_all; s8 agc_gain_adjust; + s8 agc_lowest_gain; s8 low_gain; s8 temp_vco; @@ -114,8 +115,10 @@ struct mt76x02_dev { struct mt76x02_dfs_pattern_detector dfs_pd; /* edcca monitor */ + unsigned long ed_trigger_timeout; bool ed_tx_blocked; bool ed_monitor; + u8 ed_monitor_learning; u8 ed_trigger; u8 ed_silent; ktime_t ed_time; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index e1e0c8da5a8c..9ed231abe916 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -960,6 +960,7 @@ void mt76x02_edcca_init(struct mt76x02_dev *dev, bool enable) } } mt76x02_edcca_tx_enable(dev, true); + dev->ed_monitor_learning = true; /* clear previous CCA timer value */ mt76_rr(dev, MT_ED_CCA_TIMER); @@ -969,6 +970,10 @@ EXPORT_SYMBOL_GPL(mt76x02_edcca_init); #define MT_EDCCA_TH 92 #define MT_EDCCA_BLOCK_TH 2 +#define MT_EDCCA_LEARN_TH 50 +#define MT_EDCCA_LEARN_CCA 180 +#define MT_EDCCA_LEARN_TIMEOUT (20 * HZ) + static void mt76x02_edcca_check(struct mt76x02_dev *dev) { ktime_t cur_time; @@ -991,11 +996,23 @@ static void mt76x02_edcca_check(struct mt76x02_dev *dev) dev->ed_trigger = 0; } - if (dev->ed_trigger > MT_EDCCA_BLOCK_TH && - !dev->ed_tx_blocked) + if (dev->cal.agc_lowest_gain && + dev->cal.false_cca > MT_EDCCA_LEARN_CCA && + dev->ed_trigger > MT_EDCCA_LEARN_TH) { + dev->ed_monitor_learning = false; + dev->ed_trigger_timeout = jiffies + 20 * HZ; + } else if (!dev->ed_monitor_learning && + time_is_after_jiffies(dev->ed_trigger_timeout)) { + dev->ed_monitor_learning = true; + mt76x02_edcca_tx_enable(dev, true); + } + + if (dev->ed_monitor_learning) + return; + + if (dev->ed_trigger > MT_EDCCA_BLOCK_TH && !dev->ed_tx_blocked) mt76x02_edcca_tx_enable(dev, false); - else if (dev->ed_silent > MT_EDCCA_BLOCK_TH && - dev->ed_tx_blocked) + else if (dev->ed_silent > MT_EDCCA_BLOCK_TH && dev->ed_tx_blocked) mt76x02_edcca_tx_enable(dev, true); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c index a020c757ba5c..a54b63a96eae 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c @@ -194,6 +194,8 @@ bool mt76x02_phy_adjust_vga_gain(struct mt76x02_dev *dev) ret = true; } + dev->cal.agc_lowest_gain = dev->cal.agc_gain_adjust >= limit; + return ret; } EXPORT_SYMBOL_GPL(mt76x02_phy_adjust_vga_gain); -- cgit v1.2.3 From 7635276989a183bdb424f9e930f836b6264d54dc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 11:06:19 +0100 Subject: mt76: mt7603: fix tx status HT rate validation Use the correct variable in the check. Fixes an uninitialized variable warning Reported-by: Gustavo A. R. Silva Fixes: c8846e1015022 ("mt76: add driver for MT7603E and MT7628/7688") Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 0a0115861b51..5e31d7da96fc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -1072,7 +1072,7 @@ out: case MT_PHY_TYPE_HT: final_rate_flags |= IEEE80211_TX_RC_MCS; final_rate &= GENMASK(5, 0); - if (i > 15) + if (final_rate > 15) return false; break; default: -- cgit v1.2.3 From 45a042e3026824a7e910db7a4dd38fef0540b902 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 15:10:00 +0100 Subject: mt76: mt76x2: fix external LNA gain settings Devices with external LNA need different values for AGC registers 8 and 9 Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/phy.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index 1848e8ab2e21..c7e71f2ba2a7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -260,10 +260,15 @@ mt76x2_phy_set_gain_val(struct mt76x02_dev *dev) gain_val[0] = dev->cal.agc_gain_cur[0] - dev->cal.agc_gain_adjust; gain_val[1] = dev->cal.agc_gain_cur[1] - dev->cal.agc_gain_adjust; - if (dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40) + val = 0x1836 << 16; + if (!mt76x2_has_ext_lna(dev) && + dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40) val = 0x1e42 << 16; - else - val = 0x1836 << 16; + + if (mt76x2_has_ext_lna(dev) && + dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ && + dev->mt76.chandef.width < NL80211_CHAN_WIDTH_40) + val = 0x0f36 << 16; val |= 0xf8; -- cgit v1.2.3 From b8cfd87ac24273e36fbd3ecda631f3ba6566d493 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 15:12:14 +0100 Subject: mt76: mt76x2: fix 2.4 GHz channel gain settings AGC register 35, 37 override for the low gain setting should only be done on 5 GHz. Also, 2.4 GHz needs a different value for register 35 Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/phy.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index c7e71f2ba2a7..769a9b972044 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -285,6 +285,7 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) { u8 *gain = dev->cal.agc_gain_init; u8 low_gain_delta, gain_delta; + u32 agc_35, agc_37; bool gain_change; int low_gain; u32 val; @@ -323,6 +324,16 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) else low_gain_delta = 14; + agc_37 = 0x2121262c; + if (dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ) + agc_35 = 0x11111516; + else if (low_gain == 2) + agc_35 = agc_37 = 0x08080808; + else if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) + agc_35 = 0x10101014; + else + agc_35 = 0x11111116; + if (low_gain == 2) { mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a990); mt76_wr(dev, MT_BBP(AGC, 35), 0x08080808); @@ -331,15 +342,13 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) dev->cal.agc_gain_adjust = 0; } else { mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a991); - if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) - mt76_wr(dev, MT_BBP(AGC, 35), 0x10101014); - else - mt76_wr(dev, MT_BBP(AGC, 35), 0x11111116); - mt76_wr(dev, MT_BBP(AGC, 37), 0x2121262C); gain_delta = 0; dev->cal.agc_gain_adjust = low_gain_delta; } + mt76_wr(dev, MT_BBP(AGC, 35), agc_35); + mt76_wr(dev, MT_BBP(AGC, 37), agc_37); + dev->cal.agc_gain_cur[0] = gain[0] - gain_delta; dev->cal.agc_gain_cur[1] = gain[1] - gain_delta; mt76x2_phy_set_gain_val(dev); -- cgit v1.2.3 From f25e813bf48dce541c29b12cfc19a2c25b6db915 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 18:39:08 +0100 Subject: mt76: mt7603: clear ps filtering mode before releasing buffered frames Fixes sending them, otherwise they loop back right into the buffer Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index b10775ed92e6..8da0b8707d24 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -399,6 +399,8 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw, __skb_queue_head_init(&list); + mt7603_wtbl_set_ps(dev, msta, false); + spin_lock_bh(&dev->ps_lock); skb_queue_walk_safe(&msta->psq, skb, tmp) { if (!nframes) -- cgit v1.2.3 From fca9615f1a436d1e9f64042cda08a34fe32ce668 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 18:40:18 +0100 Subject: mt76: mt7603: fix up hardware queue index for PS filtered packets Make the queue index match the hardware queue on which they get sent out Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index d69e82c66ab2..494a48e023ef 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -50,6 +50,10 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) val = le32_to_cpu(txd[0]); skb_set_queue_mapping(skb, FIELD_GET(MT_TXD0_Q_IDX, val)); + val &= ~(MT_TXD0_P_IDX | MT_TXD0_Q_IDX); + val |= FIELD_PREP(MT_TXD0_Q_IDX, MT_TX_HW_QUEUE_MGMT); + txd[0] = cpu_to_le32(val); + spin_lock_bh(&dev->ps_lock); __skb_queue_tail(&msta->psq, skb); if (skb_queue_len(&msta->psq) >= 64) { -- cgit v1.2.3 From e004b7006600258615b969f05c4d008c1d68973b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 18:51:35 +0100 Subject: mt76: mt7603: notify mac80211 about buffered frames in ps queue Also fix the size check for filtered powersave frames Fixes a corner case with waking up clients Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index 494a48e023ef..b3ae0aaea62a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -27,12 +27,16 @@ static void mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) { __le32 *txd = (__le32 *)skb->data; + struct ieee80211_hdr *hdr; + struct ieee80211_sta *sta; struct mt7603_sta *msta; struct mt76_wcid *wcid; + void *priv; int idx; u32 val; + u8 tid; - if (skb->len < sizeof(MT_TXD_SIZE) + sizeof(struct ieee80211_hdr)) + if (skb->len < MT_TXD_SIZE + sizeof(struct ieee80211_hdr)) goto free; val = le32_to_cpu(txd[1]); @@ -46,7 +50,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) if (!wcid) goto free; - msta = container_of(wcid, struct mt7603_sta, wcid); + priv = msta = container_of(wcid, struct mt7603_sta, wcid); val = le32_to_cpu(txd[0]); skb_set_queue_mapping(skb, FIELD_GET(MT_TXD0_Q_IDX, val)); @@ -54,6 +58,11 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) val |= FIELD_PREP(MT_TXD0_Q_IDX, MT_TX_HW_QUEUE_MGMT); txd[0] = cpu_to_le32(val); + sta = container_of(priv, struct ieee80211_sta, drv_priv); + hdr = (struct ieee80211_hdr *) &skb->data[MT_TXD_SIZE]; + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + ieee80211_sta_set_buffered(sta, tid, true); + spin_lock_bh(&dev->ps_lock); __skb_queue_tail(&msta->psq, skb); if (skb_queue_len(&msta->psq) >= 64) { -- cgit v1.2.3 From b7001f46085e06a74e4677b44ac55566f66e55aa Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 19:16:03 +0100 Subject: mt76: mt7603: clear the service period on releasing PS filtered packets These packets have no txwi entry in the ring, so tracking via tx status does not work. To prevent PS poll requests from being unanswered, end the service period right away Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 8da0b8707d24..ea25eff5e81c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -416,6 +416,9 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw, } spin_unlock_bh(&dev->ps_lock); + if (!skb_queue_empty(&list)) + ieee80211_sta_eosp(sta); + mt7603_ps_tx_list(dev, &list); if (nframes) -- cgit v1.2.3 From ffc9a7ff59a41df9a0a265b2dd04d97f2b35893a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 19:19:21 +0100 Subject: mt76: when releasing PS frames, end the service period if no frame was found Fixes a rare corner case if the txq dequeue attempt fails, but mac80211 still has PS buffered packets Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index fc7dffe066be..2585df512335 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -377,7 +377,10 @@ mt76_release_buffered_frames(struct ieee80211_hw *hw, struct ieee80211_sta *sta, if (last_skb) { mt76_queue_ps_skb(dev, sta, last_skb, true); dev->queue_ops->kick(dev, hwq); + } else { + ieee80211_sta_eosp(sta); } + spin_unlock_bh(&hwq->lock); } EXPORT_SYMBOL_GPL(mt76_release_buffered_frames); -- cgit v1.2.3 From 643749d4a82b150afd2f87b29ca3dda885f8e384 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 19:40:36 +0100 Subject: mt76: mt76x02: disable ED/CCA by default This feature has been reported to cause stability issues on several systems. Disable it until it has been fixed and verified. It can still be enabled through debugfs Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02.h | 1 + .../net/wireless/mediatek/mt76/mt76x02_debugfs.c | 27 ++++++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c | 3 ++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 42cc9da68f7b..b5a36d9fb2bd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -118,6 +118,7 @@ struct mt76x02_dev { unsigned long ed_trigger_timeout; bool ed_tx_blocked; bool ed_monitor; + u8 ed_monitor_enabled; u8 ed_monitor_learning; u8 ed_trigger; u8 ed_silent; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c index 7580c5c986ff..b1d6fd4861e3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c @@ -116,6 +116,32 @@ static int read_agc(struct seq_file *file, void *data) return 0; } +static int +mt76_edcca_set(void *data, u64 val) +{ + struct mt76x02_dev *dev = data; + enum nl80211_dfs_regions region = dev->dfs_pd.region; + + dev->ed_monitor_enabled = !!val; + dev->ed_monitor = dev->ed_monitor_enabled && + region == NL80211_DFS_ETSI; + mt76x02_edcca_init(dev, true); + + return 0; +} + +static int +mt76_edcca_get(void *data, u64 *val) +{ + struct mt76x02_dev *dev = data; + + *val = dev->ed_monitor_enabled; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_edcca, mt76_edcca_get, mt76_edcca_set, + "%lld\n"); + void mt76x02_init_debugfs(struct mt76x02_dev *dev) { struct dentry *dir; @@ -127,6 +153,7 @@ void mt76x02_init_debugfs(struct mt76x02_dev *dev) debugfs_create_u8("temperature", 0400, dir, &dev->cal.temp); debugfs_create_bool("tpc", 0600, dir, &dev->enable_tpc); + debugfs_create_file("edcca", 0400, dir, dev, &fops_edcca); debugfs_create_file("ampdu_stat", 0400, dir, dev, &fops_ampdu_stat); debugfs_create_file("dfs_stats", 0400, dir, dev, &fops_dfs_stat); debugfs_create_devm_seqfile(dev->mt76.dev, "txpower", dir, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c index e4649103efd4..17d12d212d1b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c @@ -885,7 +885,8 @@ mt76x02_dfs_set_domain(struct mt76x02_dev *dev, if (dfs_pd->region != region) { tasklet_disable(&dfs_pd->dfs_tasklet); - dev->ed_monitor = region == NL80211_DFS_ETSI; + dev->ed_monitor = dev->ed_monitor_enabled && + region == NL80211_DFS_ETSI; mt76x02_edcca_init(dev, true); dfs_pd->region = region; -- cgit v1.2.3 From b126c889743513543d007ac1c5c82b61ef003133 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 4 Mar 2019 08:36:11 +0100 Subject: mt76: mt7603: set moredata flag when queueing ps-filtered packets Clients should poll for more packets afterwards Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index ea25eff5e81c..cc0fe0933b2d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -5,6 +5,7 @@ #include #include #include "mt7603.h" +#include "mac.h" #include "eeprom.h" static int @@ -385,6 +386,15 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps) mt7603_ps_tx_list(dev, &list); } +static void +mt7603_ps_set_more_data(struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr; + + hdr = (struct ieee80211_hdr *) &skb->data[MT_TXD_SIZE]; + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); +} + static void mt7603_release_buffered_frames(struct ieee80211_hw *hw, struct ieee80211_sta *sta, @@ -411,6 +421,7 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw, skb_set_queue_mapping(skb, MT_TXQ_PSD); __skb_unlink(skb, &msta->psq); + mt7603_ps_set_more_data(skb); __skb_queue_tail(&list, skb); nframes--; } -- cgit v1.2.3 From 7c1b998d3483acf785c45035c0e14a62023f1edb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 4 Mar 2019 01:10:00 +0000 Subject: mt76: fix return value check in mt76_wmac_probe() In case of error, the function devm_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: c8846e101502 ("mt76: add driver for MT7603E and MT7628/7688") Signed-off-by: Wei Yongjun Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c index e13fea80d970..b920be1f5718 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c @@ -23,9 +23,9 @@ mt76_wmac_probe(struct platform_device *pdev) } mem_base = devm_ioremap_resource(&pdev->dev, res); - if (!mem_base) { + if (IS_ERR(mem_base)) { dev_err(&pdev->dev, "Failed to get memory resource\n"); - return -EINVAL; + return PTR_ERR(mem_base); } mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt7603_ops, -- cgit v1.2.3 From 411e05f4e87794332f328ca3fa201b731f023db5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 6 Mar 2019 10:51:12 +0100 Subject: mt76x2u: remove duplicated entry in mt76x2u_device_table Remove duplicated entry in mt76x2u_device_table since Alfa AWUS036ACM and Aukey USB-AC1200 have the same ids Fixes: 62a25dc56990a ("mt76x2u: Add support for Alfa AWUS036ACM") Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c index ddb6b2c48e01..7a5d539873ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c @@ -21,11 +21,10 @@ #include "mt76x2u.h" static const struct usb_device_id mt76x2u_device_table[] = { - { USB_DEVICE(0x0e8d, 0x7612) }, /* Alfa AWUS036ACM */ { USB_DEVICE(0x0b05, 0x1833) }, /* Asus USB-AC54 */ { USB_DEVICE(0x0b05, 0x17eb) }, /* Asus USB-AC55 */ { USB_DEVICE(0x0b05, 0x180b) }, /* Asus USB-N53 B1 */ - { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USB-AC1200 */ + { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USBAC1200 - Alfa AWUS036ACM */ { USB_DEVICE(0x057c, 0x8503) }, /* Avm FRITZ!WLAN AC860 */ { USB_DEVICE(0x7392, 0xb711) }, /* Edimax EW 7722 UAC */ { USB_DEVICE(0x0846, 0x9053) }, /* Netgear A6210 */ -- cgit v1.2.3 From 688cd8bd2c0fa9dc88e5ced55a73ddc79edf875d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 21:38:42 +0100 Subject: iwlwifi: fix 64-bit division do_div() expects unsigned operands and otherwise triggers a warning like: drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c:465:2: error: comparison of distinct pointer types ('typeof ((rtt_avg)) *' (aka 'long long *') and 'uint64_t *' (aka 'unsigned long long *')) [-Werror,-Wcompare-distinct-pointer-types] do_div(rtt_avg, 6666); ^~~~~~~~~~~~~~~~~~~~~ include/asm-generic/div64.h:222:28: note: expanded from macro 'do_div' (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ ~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~ 1 error generated. Change the do_div() to the simpler div_s64() that can handle negative inputs correctly. Fixes: 937b10c0de68 ("iwlwifi: mvm: add debug prints for FTM") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index e9822a3ec373..94132cfd1f56 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -460,9 +460,7 @@ static int iwl_mvm_ftm_range_resp_valid(struct iwl_mvm *mvm, u8 request_id, static void iwl_mvm_debug_range_resp(struct iwl_mvm *mvm, u8 index, struct cfg80211_pmsr_result *res) { - s64 rtt_avg = res->ftm.rtt_avg * 100; - - do_div(rtt_avg, 6666); + s64 rtt_avg = div_s64(res->ftm.rtt_avg * 100, 6666); IWL_DEBUG_INFO(mvm, "entry %d\n", index); IWL_DEBUG_INFO(mvm, "\tstatus: %d\n", res->status); -- cgit v1.2.3 From f38a1f0a5a5710b14c0e899628c815522c6111cf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Mar 2019 15:58:20 -0800 Subject: libbpf: handle BTF parsing and loading properly This patch splits and cleans up error handling logic for loading BTF data. Previously, if BTF data was parsed successfully, but failed to load into kernel, we'd report nonsensical error code, instead of error returned from btf__load(). Now btf__new() and btf__load() are handled separately with proper cleanup and warning reporting. Fixes: d29d87f7e612 ("btf: separate btf creation and loading") Reported-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d5b830d60601..5e977d2688da 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -835,12 +835,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { obj->btf = btf__new(data->d_buf, data->d_size); - if (IS_ERR(obj->btf) || btf__load(obj->btf)) { + if (IS_ERR(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); - if (!IS_ERR(obj->btf)) - btf__free(obj->btf); obj->btf = NULL; + continue; + } + err = btf__load(obj->btf); + if (err) { + pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + err = 0; } } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; -- cgit v1.2.3 From d6f1837107c00fa7b2fdb606acf65b33b455dbfd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Mar 2019 22:21:09 -0700 Subject: selftests/bpf: fix segfault of test_progs when prog loading failed The test_progs subtests, test_spin_lock() and test_map_lock(), requires BTF present to run successfully. Currently, when BTF failed to load, test_progs will segfault, $ ./test_progs ... 12: (bf) r1 = r8 13: (85) call bpf_spin_lock#93 map 'hash_map' has to have BTF in order to use bpf_spin_lock libbpf: -- END LOG -- libbpf: failed to load program 'map_lock_demo' libbpf: failed to load object './test_map_lock.o' test_map_lock:bpf_prog_load errno 13 Segmentation fault The segfault is caused by uninitialized variable "obj", which is used in bpf_object__close(obj), when bpf prog failed to load. Initializing variable "obj" to NULL in two occasions fixed the problem. $ ./test_progs ... Summary: 219 PASSED, 2 FAILED Fixes: b4d4556c3266 ("selftests/bpf: add bpf_spin_lock verifier tests") Fixes: ba72a7b4badb ("selftests/bpf: test for BPF_F_LOCK") Reported-by: Daniel Borkmann Signed-off-by: Yonghong Song Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/prog_tests/map_lock.c | 2 +- tools/testing/selftests/bpf/prog_tests/spinlock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index 90f8a206340a..ee99368c595c 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -37,7 +37,7 @@ void test_map_lock(void) const char *file = "./test_map_lock.o"; int prog_fd, map_fd[2], vars[17] = {}; pthread_t thread_id[6]; - struct bpf_object *obj; + struct bpf_object *obj = NULL; int err = 0, key = 0, i; void *ret; diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 9a573a9675d7..114ebe6a438e 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -5,7 +5,7 @@ void test_spinlock(void) { const char *file = "./test_spin_lock.o"; pthread_t thread_id[4]; - struct bpf_object *obj; + struct bpf_object *obj = NULL; int prog_fd; int err = 0, i; void *ret; -- cgit v1.2.3 From 6bf21b54a596d60905cfc7e8af8e2fe16d9fe7e9 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 12 Mar 2019 09:59:45 +0100 Subject: libbpf: fix to reject unknown flags in xsk_socket__create() In xsk_socket__create(), the libbpf_flags field was not checked for setting currently unused/unknown flags. This patch fixes that by returning -EINVAL if the user has set any flag that is not in use at this point in time. Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Signed-off-by: Magnus Karlsson Reviewed-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/xsk.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index f98ac82c9aea..8d0078b65486 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -126,8 +126,8 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_headroom = usr_cfg->frame_headroom; } -static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, - const struct xsk_socket_config *usr_cfg) +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) { if (!usr_cfg) { cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; @@ -135,14 +135,19 @@ static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, cfg->libbpf_flags = 0; cfg->xdp_flags = 0; cfg->bind_flags = 0; - return; + return 0; } + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + cfg->rx_size = usr_cfg->rx_size; cfg->tx_size = usr_cfg->tx_size; cfg->libbpf_flags = usr_cfg->libbpf_flags; cfg->xdp_flags = usr_cfg->xdp_flags; cfg->bind_flags = usr_cfg->bind_flags; + + return 0; } int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, @@ -557,7 +562,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } strncpy(xsk->ifname, ifname, IFNAMSIZ); - xsk_set_xdp_socket_config(&xsk->config, usr_config); + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_socket; if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, -- cgit v1.2.3 From 2795e8c251614ac0784c9d41008551109f665716 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 02:25:17 -0500 Subject: net: ieee802154: fix a potential NULL pointer dereference In case alloc_ordered_workqueue fails, the fix releases sources and returns -ENOMEM to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Acked-by: Michael Hennerich Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index cd1d8faccca5..cd6b95e673a5 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1268,6 +1268,10 @@ static int adf7242_probe(struct spi_device *spi) INIT_DELAYED_WORK(&lp->work, adf7242_rx_cal_work); lp->wqueue = alloc_ordered_workqueue(dev_name(&spi->dev), WQ_MEM_RECLAIM); + if (unlikely(!lp->wqueue)) { + ret = -ENOMEM; + goto err_hw_init; + } ret = adf7242_hw_init(lp); if (ret) -- cgit v1.2.3 From 19b39a25388e71390e059906c979f87be4ef0c71 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 13:10:29 +0800 Subject: ieee802154: hwsim: propagate genlmsg_reply return code genlmsg_reply can fail, so propagate its return code Signed-off-by: Li RongQing Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index b6743f03dce0..3b88846de31b 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -324,7 +324,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) goto out_err; } - genlmsg_reply(skb, info); + res = genlmsg_reply(skb, info); break; } -- cgit v1.2.3 From 1b986589680a2a5b6fc1ac196ea69925a93d9dd9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:02 -0700 Subject: bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to bpf_sk_release Lorenz Bauer [thanks!] reported that a ptr returned by bpf_tcp_sock(sk) can still be accessed after bpf_sk_release(sk). Both bpf_tcp_sock() and bpf_sk_fullsock() have the same issue. This patch addresses them together. A simple reproducer looks like this: sk = bpf_sk_lookup_tcp(); /* if (!sk) ... */ tp = bpf_tcp_sock(sk); /* if (!tp) ... */ bpf_sk_release(sk); snd_cwnd = tp->snd_cwnd; /* oops! The verifier does not complain. */ The problem is the verifier did not scrub the register's states of the tcp_sock ptr (tp) after bpf_sk_release(sk). [ Note that when calling bpf_tcp_sock(sk), the sk is not always refcount-acquired. e.g. bpf_tcp_sock(skb->sk). The verifier works fine for this case. ] Currently, the verifier does not track if a helper's return ptr (in REG_0) is "carry"-ing one of its argument's refcount status. To carry this info, the reg1->id needs to be stored in reg0. One approach was tried, like "reg0->id = reg1->id", when calling "bpf_tcp_sock()". The main idea was to avoid adding another "ref_obj_id" for the same reg. However, overlapping the NULL marking and ref tracking purpose in one "id" does not work well: ref_sk = bpf_sk_lookup_tcp(); fullsock = bpf_sk_fullsock(ref_sk); tp = bpf_tcp_sock(ref_sk); if (!fullsock) { bpf_sk_release(ref_sk); return 0; } /* fullsock_reg->id is marked for NOT-NULL. * Same for tp_reg->id because they have the same id. */ /* oops. verifier did not complain about the missing !tp check */ snd_cwnd = tp->snd_cwnd; Hence, a new "ref_obj_id" is needed in "struct bpf_reg_state". With a new ref_obj_id, when bpf_sk_release(sk) is called, the verifier can scrub all reg states which has a ref_obj_id match. It is done with the changes in release_reg_references() in this patch. While fixing it, sk_to_full_sk() is removed from bpf_tcp_sock() and bpf_sk_fullsock() to avoid these helpers from returning another ptr. It will make bpf_sk_release(tp) possible: sk = bpf_sk_lookup_tcp(); /* if (!sk) ... */ tp = bpf_tcp_sock(sk); /* if (!tp) ... */ bpf_sk_release(tp); A separate helper "bpf_get_listener_sock()" will be added in a later patch to do sk_to_full_sk(). Misc change notes: - To allow bpf_sk_release(tp), the arg of bpf_sk_release() is changed from ARG_PTR_TO_SOCKET to ARG_PTR_TO_SOCK_COMMON. ARG_PTR_TO_SOCKET is removed from bpf.h since no helper is using it. - arg_type_is_refcounted() is renamed to arg_type_may_be_refcounted() because ARG_PTR_TO_SOCK_COMMON is the only one and skb->sk is not refcounted. All bpf_sk_release(), bpf_sk_fullsock() and bpf_tcp_sock() take ARG_PTR_TO_SOCK_COMMON. - check_refcount_ok() ensures is_acquire_function() cannot take arg_type_may_be_refcounted() as its argument. - The check_func_arg() can only allow one refcount-ed arg. It is guaranteed by check_refcount_ok() which ensures at most one arg can be refcounted. Hence, it is a verifier internal error if >1 refcount arg found in check_func_arg(). - In release_reference(), release_reference_state() is called first to ensure a match on "reg->ref_obj_id" can be found before scrubbing the reg states with release_reg_references(). - reg_is_refcounted() is no longer needed. 1. In mark_ptr_or_null_regs(), its usage is replaced by "ref_obj_id && ref_obj_id == id" because, when is_null == true, release_reference_state() should only be called on the ref_obj_id obtained by a acquire helper (i.e. is_acquire_function() == true). Otherwise, the following would happen: sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ fullsock = bpf_sk_fullsock(sk); if (!fullsock) { /* * release_reference_state(fullsock_reg->ref_obj_id) * where fullsock_reg->ref_obj_id == sk_reg->ref_obj_id. * * Hence, the following bpf_sk_release(sk) will fail * because the ref state has already been released in the * earlier release_reference_state(fullsock_reg->ref_obj_id). */ bpf_sk_release(sk); } 2. In release_reg_references(), the current reg_is_refcounted() call is unnecessary because the id check is enough. - The type_is_refcounted() and type_is_refcounted_or_null() are no longer needed also because reg_is_refcounted() is removed. Fixes: 655a51e536c0 ("bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock") Reported-by: Lorenz Bauer Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - include/linux/bpf_verifier.h | 40 +++++++++++++ kernel/bpf/verifier.c | 131 ++++++++++++++++++++++++------------------- net/core/filter.c | 6 +- 4 files changed, 115 insertions(+), 63 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a2132e09dc1c..f02367faa58d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -193,7 +193,6 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ - ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 69f7a3449eda..7d8228d1c898 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,6 +66,46 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. */ u32 id; + /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned + * from a pointer-cast helper, bpf_sk_fullsock() and + * bpf_tcp_sock(). + * + * Consider the following where "sk" is a reference counted + * pointer returned from "sk = bpf_sk_lookup_tcp();": + * + * 1: sk = bpf_sk_lookup_tcp(); + * 2: if (!sk) { return 0; } + * 3: fullsock = bpf_sk_fullsock(sk); + * 4: if (!fullsock) { bpf_sk_release(sk); return 0; } + * 5: tp = bpf_tcp_sock(fullsock); + * 6: if (!tp) { bpf_sk_release(sk); return 0; } + * 7: bpf_sk_release(sk); + * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain + * + * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and + * "tp" ptr should be invalidated also. In order to do that, + * the reg holding "fullsock" and "sk" need to remember + * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id + * such that the verifier can reset all regs which have + * ref_obj_id matching the sk_reg->id. + * + * sk_reg->ref_obj_id is set to sk_reg->id at line 1. + * sk_reg->id will stay as NULL-marking purpose only. + * After NULL-marking is done, sk_reg->id can be reset to 0. + * + * After "fullsock = bpf_sk_fullsock(sk);" at line 3, + * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id. + * + * After "tp = bpf_tcp_sock(fullsock);" at line 5, + * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id + * which is the same as sk_reg->ref_obj_id. + * + * From the verifier perspective, if sk, fullsock and tp + * are not NULL, they are the same ptr with different + * reg->type. In particular, bpf_sk_release(tp) is also + * allowed and has the same effect as bpf_sk_release(sk). + */ + u32 ref_obj_id; /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ce166a002d16..86f9cd5d1c4e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -212,7 +212,7 @@ struct bpf_call_arg_meta { int access_size; s64 msize_smax_value; u64 msize_umax_value; - int ptr_id; + int ref_obj_id; int func_id; }; @@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) type == PTR_TO_TCP_SOCK_OR_NULL; } -static bool type_is_refcounted(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET; -} - -static bool type_is_refcounted_or_null(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL; -} - -static bool reg_is_refcounted(const struct bpf_reg_state *reg) -{ - return type_is_refcounted(reg->type); -} - static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && map_value_has_spin_lock(reg->map_ptr); } -static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) +static bool arg_type_may_be_refcounted(enum bpf_arg_type type) { - return type_is_refcounted_or_null(reg->type); -} - -static bool arg_type_is_refcounted(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_SOCKET; + return type == ARG_PTR_TO_SOCK_COMMON; } /* Determine whether the function releases some resources allocated by another @@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id) func_id == BPF_FUNC_sk_lookup_udp; } +static bool is_ptr_cast_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_tcp_sock || + func_id == BPF_FUNC_sk_fullsock; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (t == PTR_TO_STACK) verbose(env, ",call_%d", func(env, reg)->callsite); } else { - verbose(env, "(id=%d", reg->id); + verbose(env, "(id=%d ref_obj_id=%d", reg->id, + reg->ref_obj_id); if (t != SCALAR_VALUE) verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) @@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ if (!type_is_sk_pointer(type)) goto err_type; - } else if (arg_type == ARG_PTR_TO_SOCKET) { - expected_type = PTR_TO_SOCKET; - if (type != expected_type) - goto err_type; - if (meta->ptr_id || !reg->id) { - verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", - meta->ptr_id, reg->id); - return -EFAULT; + if (reg->ref_obj_id) { + if (meta->ref_obj_id) { + verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, + meta->ref_obj_id); + return -EFAULT; + } + meta->ref_obj_id = reg->ref_obj_id; } - meta->ptr_id = reg->id; } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) return true; } -static bool check_refcount_ok(const struct bpf_func_proto *fn) +static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) { int count = 0; - if (arg_type_is_refcounted(fn->arg1_type)) + if (arg_type_may_be_refcounted(fn->arg1_type)) count++; - if (arg_type_is_refcounted(fn->arg2_type)) + if (arg_type_may_be_refcounted(fn->arg2_type)) count++; - if (arg_type_is_refcounted(fn->arg3_type)) + if (arg_type_may_be_refcounted(fn->arg3_type)) count++; - if (arg_type_is_refcounted(fn->arg4_type)) + if (arg_type_may_be_refcounted(fn->arg4_type)) count++; - if (arg_type_is_refcounted(fn->arg5_type)) + if (arg_type_may_be_refcounted(fn->arg5_type)) count++; + /* A reference acquiring function cannot acquire + * another refcounted ptr. + */ + if (is_acquire_function(func_id) && count) + return false; + /* We only support one arg being unreferenced at the moment, * which is sufficient for the helper functions we have right now. */ return count <= 1; } -static int check_func_proto(const struct bpf_func_proto *fn) +static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && - check_refcount_ok(fn) ? 0 : -EINVAL; + check_refcount_ok(fn, func_id) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] @@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, int id) + struct bpf_func_state *state, + int ref_obj_id) { struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].id == id) + if (regs[i].ref_obj_id == ref_obj_id) mark_reg_unknown(env, regs, i); bpf_for_each_spilled_reg(i, state, reg) { if (!reg) continue; - if (reg_is_refcounted(reg) && reg->id == id) + if (reg->ref_obj_id == ref_obj_id) __mark_reg_unknown(reg); } } @@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env, * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, - struct bpf_call_arg_meta *meta) + int ref_obj_id) { struct bpf_verifier_state *vstate = env->cur_state; + int err; int i; + err = release_reference_state(cur_func(env), ref_obj_id); + if (err) + return err; + for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], meta->ptr_id); + release_reg_references(env, vstate->frame[i], ref_obj_id); - return release_reference_state(cur_func(env), meta->ptr_id); + return 0; } static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - err = check_func_proto(fn); + err = check_func_proto(fn, func_id); if (err) { verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); @@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } } else if (is_release_function(func_id)) { - err = release_reference(env, &meta); + err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id); @@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (id < 0) return id; - /* For release_reference() */ + /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = id; + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = id; } else { /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = ++env->id_gen; @@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (is_ptr_cast_function(func_id)) + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + do_refine_retval_range(regs, fn->ret_type, func_id, &meta); err = check_map_func_compatibility(env, meta.map_ptr, func_id); @@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { reg->type = PTR_TO_TCP_SOCK; } - if (is_null || !(reg_is_refcounted(reg) || - reg_may_point_to_spin_lock(reg))) { - /* We don't need id from this point onwards anymore, - * thus we should better reset it, so that state - * pruning has chances to take effect. + if (is_null) { + /* We don't need id and ref_obj_id from this point + * onwards anymore, thus we should better reset it, + * so that state pruning has chances to take effect. + */ + reg->id = 0; + reg->ref_obj_id = 0; + } else if (!reg_may_point_to_spin_lock(reg)) { + /* For not-NULL ptr, reg->ref_obj_id will be reset + * in release_reg_references(). + * + * reg->id is still used by spin_lock ptr. Other + * than spin_lock ptr type, reg->id can be reset. */ reg->id = 0; } @@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, { struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *reg, *regs = state->regs; + u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; int i, j; - if (reg_is_refcounted_or_null(®s[regno]) && is_null) - release_reference_state(state, id); + if (ref_obj_id && ref_obj_id == id && is_null) + /* regs[regno] is in the " == NULL" branch. + * No one could have freed the reference state before + * doing the NULL check. + */ + WARN_ON_ONCE(release_reference_state(state, id)); for (i = 0; i < MAX_BPF_REG; i++) mark_ptr_or_null_reg(state, ®s[i], id, is_null); diff --git a/net/core/filter.c b/net/core/filter.c index f274620945ff..36b6afacf83c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) { - sk = sk_to_full_sk(sk); - return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; } @@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .func = bpf_sk_release, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCKET, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, }; BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, @@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - sk = sk_to_full_sk(sk); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) return (unsigned long)sk; -- cgit v1.2.3 From dbafd7ddd62369b2f3926ab847cbf8fc40e800b7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:04 -0700 Subject: bpf: Add bpf_get_listener_sock(struct bpf_sock *sk) helper Add a new helper "struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)" which returns a bpf_sock in TCP_LISTEN state. It will trace back to the listener sk from a request_sock if possible. It returns NULL for all other cases. No reference is taken because the helper ensures the sk is in SOCK_RCU_FREE (where the TCP_LISTEN sock should be in). Hence, bpf_sk_release() is unnecessary and the verifier does not allow bpf_sk_release(listen_sk) to be called either. The following is also allowed because the bpf_prog is run under rcu_read_lock(): sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ listen_sk = bpf_get_listener_sock(sk); /* if (!listen_sk) { ... } */ bpf_sk_release(sk); src_port = listen_sk->src_port; /* Allowed */ Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 11 ++++++++++- net/core/filter.c | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c38ac9a92a7..983b25cb608d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2366,6 +2366,14 @@ union bpf_attr { * current value is ect (ECN capable). Works with IPv6 and IPv4. * Return * 1 if set, 0 if not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in TCP_LISTEN state. + * bpf_sk_release() is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2473,8 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 36b6afacf83c..647c63a7b25b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5418,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = { .arg1_type = ARG_PTR_TO_SOCK_COMMON, }; +BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_get_listener_sock_proto = { + .func = bpf_get_listener_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; @@ -5603,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_INET case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; + case BPF_FUNC_get_listener_sock: + return &bpf_get_listener_sock_proto; case BPF_FUNC_skb_ecn_set_ce: return &bpf_skb_ecn_set_ce_proto; #endif @@ -5698,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_release_proto; case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; + case BPF_FUNC_get_listener_sock: + return &bpf_get_listener_sock_proto; #endif default: return bpf_base_func_proto(func_id); -- cgit v1.2.3 From ef776a272b093fb52612e6d8f4b3e77f9bb49554 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:06 -0700 Subject: bpf: Sync bpf.h to tools/ This patch sync the uapi bpf.h to tools/. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3c38ac9a92a7..983b25cb608d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2366,6 +2366,14 @@ union bpf_attr { * current value is ect (ECN capable). Works with IPv6 and IPv4. * Return * 1 if set, 0 if not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in TCP_LISTEN state. + * bpf_sk_release() is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2473,8 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From b55aa7b04bb42274b4a894020b5b2fa059c3527e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:09 -0700 Subject: bpf: Test ref release issue in bpf_tcp_sock and bpf_sk_fullsock Adding verifier tests to ensure the ptr returned from bpf_tcp_sock() and bpf_sk_fullsock() cannot be accessed after bpf_sk_release() is called. A few of the tests are derived from a reproducer test by Lorenz Bauer. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/verifier/ref_tracking.c | 168 +++++++++++++++++++++ tools/testing/selftests/bpf/verifier/sock.c | 4 +- 2 files changed, 170 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 3ed3593bd8b6..923f2110072d 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -605,3 +605,171 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, +{ + "reference tracking: use ptr from bpf_tcp_sock() after release", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_sk_fullsock() after release", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_sk_fullsock(tp) after release", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use sk after bpf_sk_release(tp)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_listener_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: bpf_sk_release(listen_sk)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_listener_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "reference has not been acquired before", +}, +{ + /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */ + "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 0ddfdf76aba5..416436231fab 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -342,7 +342,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "type=sock_common expected=sock", + .errstr = "reference has not been acquired before", }, { "bpf_sk_release(bpf_sk_fullsock(skb->sk))", @@ -380,5 +380,5 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "type=tcp_sock expected=sock", + .errstr = "reference has not been acquired before", }, -- cgit v1.2.3 From 7681e7b2fbe2a78806423810c0d84dd230b96f94 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:11 -0700 Subject: bpf: Add an example for bpf_get_listener_sock This patch adds an example in using the new helper bpf_get_listener_sock(). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_helpers.h | 2 + .../selftests/bpf/progs/test_sock_fields_kern.c | 88 +++++++++++--- tools/testing/selftests/bpf/test_sock_fields.c | 134 ++++++++++++++++----- 3 files changed, 180 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index c9433a496d54..c81fc350f7ad 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) BPF_FUNC_sk_fullsock; static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) BPF_FUNC_tcp_sock; +static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_get_listener_sock; static int (*bpf_skb_ecn_set_ce)(void *ctx) = (void *) BPF_FUNC_skb_ecn_set_ce; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index de1a43e8f610..37328f148538 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -8,38 +8,51 @@ #include "bpf_helpers.h" #include "bpf_endian.h" -enum bpf_array_idx { - SRV_IDX, - CLI_IDX, - __NR_BPF_ARRAY_IDX, +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, }; struct bpf_map_def SEC("maps") addr_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct sockaddr_in6), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_ADDR_ARRAY_IDX, }; struct bpf_map_def SEC("maps") sock_result_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct bpf_sock), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_RESULT_ARRAY_IDX, }; struct bpf_map_def SEC("maps") tcp_sock_result_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct bpf_tcp_sock), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_RESULT_ARRAY_IDX, }; struct bpf_map_def SEC("maps") linum_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 1, + .max_entries = __NR_BPF_LINUM_ARRAY_IDX, }; static bool is_loopback6(__u32 *a6) @@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst, #define RETURN { \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ + bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ return 1; \ } SEC("cgroup_skb/egress") -int read_sock_fields(struct __sk_buff *skb) +int egress_read_sock_fields(struct __sk_buff *skb) { - __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; + __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; - __u32 linum, idx0 = 0; + __u32 linum, linum_idx; + + linum_idx = EGRESS_LINUM_IDX; sk = skb->sk; if (!sk || sk->state == 10) @@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb) RETURN; if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - idx = srv_idx; + result_idx = EGRESS_SRV_IDX; else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - idx = cli_idx; + result_idx = EGRESS_CLI_IDX; else RETURN; - sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + RETURN; +} + +SEC("cgroup_skb/ingress") +int ingress_read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + struct sockaddr_in6 *srv_sa6; + __u32 linum, linum_idx; + + linum_idx = INGRESS_LINUM_IDX; + + sk = skb->sk; + if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) + RETURN; + + if (sk->state != 10 && sk->state != 12) + RETURN; + + sk = bpf_get_listener_sock(sk); + if (!sk) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); if (!sk_ret || !tp_ret) RETURN; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index bc8943938bf5..dcae7f664dce 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -16,10 +16,23 @@ #include "cgroup_helpers.h" #include "bpf_rlimit.h" -enum bpf_array_idx { - SRV_IDX, - CLI_IDX, - __NR_BPF_ARRAY_IDX, +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, }; #define CHECK(condition, tag, format...) ({ \ @@ -41,8 +54,16 @@ static int linum_map_fd; static int addr_map_fd; static int tp_map_fd; static int sk_map_fd; -static __u32 srv_idx = SRV_IDX; -static __u32 cli_idx = CLI_IDX; + +static __u32 addr_srv_idx = ADDR_SRV_IDX; +static __u32 addr_cli_idx = ADDR_CLI_IDX; + +static __u32 egress_srv_idx = EGRESS_SRV_IDX; +static __u32 egress_cli_idx = EGRESS_CLI_IDX; +static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; + +static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; static void init_loopback6(struct sockaddr_in6 *sa6) { @@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp) static void check_result(void) { - struct bpf_tcp_sock srv_tp, cli_tp; - struct bpf_sock srv_sk, cli_sk; - __u32 linum, idx0 = 0; + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; + __u32 ingress_linum, egress_linum; int err; - err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); + err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, + &egress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", + err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, + &ingress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", + err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", + err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", + err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", "err:%d errno:%d", err, errno); + printf("listen_sk: "); + print_sk(&listen_sk); + printf("\n"); + printf("srv_sk: "); print_sk(&srv_sk); printf("\n"); @@ -124,6 +162,10 @@ static void check_result(void) print_sk(&cli_sk); printf("\n"); + printf("listen_tp: "); + print_tp(&listen_tp); + printf("\n"); + printf("srv_tp: "); print_tp(&srv_tp); printf("\n"); @@ -132,6 +174,19 @@ static void check_result(void) print_tp(&cli_tp); printf("\n"); + CHECK(listen_sk.state != 10 || + listen_sk.family != AF_INET6 || + listen_sk.protocol != IPPROTO_TCP || + memcmp(listen_sk.src_ip6, &in6addr_loopback, + sizeof(listen_sk.src_ip6)) || + listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || + listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || + listen_sk.src_port != ntohs(srv_sa6.sin6_port) || + listen_sk.dst_port, + "Unexpected listen_sk", + "Check listen_sk output. ingress_linum:%u", + ingress_linum); + CHECK(srv_sk.state == 10 || !srv_sk.state || srv_sk.family != AF_INET6 || @@ -142,7 +197,8 @@ static void check_result(void) sizeof(srv_sk.dst_ip6)) || srv_sk.src_port != ntohs(srv_sa6.sin6_port) || srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); + "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", + egress_linum); CHECK(cli_sk.state == 10 || !cli_sk.state || @@ -154,21 +210,31 @@ static void check_result(void) sizeof(cli_sk.dst_ip6)) || cli_sk.src_port != ntohs(cli_sa6.sin6_port) || cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); + "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", + egress_linum); + + CHECK(listen_tp.data_segs_out || + listen_tp.data_segs_in || + listen_tp.total_retrans || + listen_tp.bytes_acked, + "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", + ingress_linum); CHECK(srv_tp.data_segs_out != 1 || srv_tp.data_segs_in || srv_tp.snd_cwnd != 10 || srv_tp.total_retrans || srv_tp.bytes_acked != DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); + "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", + egress_linum); CHECK(cli_tp.data_segs_out || cli_tp.data_segs_in != 1 || cli_tp.snd_cwnd != 10 || cli_tp.total_retrans || cli_tp.bytes_received != DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); + "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", + egress_linum); } static void test(void) @@ -211,10 +277,10 @@ static void test(void) err, errno); /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); + err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); CHECK(err, "map_update", "err:%d errno:%d", err, errno); - err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); + err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); CHECK(err, "map_update", "err:%d errno:%d", err, errno); /* Connect from cli_sa6 to srv_sa6 */ @@ -273,9 +339,9 @@ int main(int argc, char **argv) struct bpf_prog_load_attr attr = { .file = "test_sock_fields_kern.o", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .expected_attach_type = BPF_CGROUP_INET_EGRESS, }; - int cgroup_fd, prog_fd, err; + int cgroup_fd, egress_fd, ingress_fd, err; + struct bpf_program *ingress_prog; struct bpf_object *obj; struct bpf_map *map; @@ -293,12 +359,24 @@ int main(int argc, char **argv) err = join_cgroup(TEST_CGROUP); CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); - err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); + ingress_prog = bpf_object__find_program_by_title(obj, + "cgroup_skb/ingress"); + CHECK(!ingress_prog, + "bpf_object__find_program_by_title(cgroup_skb/ingress)", + "not found"); + ingress_fd = bpf_program__fd(ingress_prog); + + err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", "err:%d errno%d", err, errno); + + err = bpf_prog_attach(ingress_fd, cgroup_fd, + BPF_CGROUP_INET_INGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", + "err:%d errno%d", err, errno); close(cgroup_fd); map = bpf_object__find_map_by_name(obj, "addr_map"); -- cgit v1.2.3 From 9768095ba97ce946838e8210f0b44f2fd36ec31d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 10 Mar 2019 17:44:09 -0700 Subject: btf: resolve enum fwds in btf_dedup GCC and clang support enum forward declarations as an extension. Such forward-declared enums will be represented as normal BTF_KIND_ENUM types with vlen=0. This patch adds ability to resolve such enums to their corresponding fully defined enums. This helps to avoid duplicated BTF type graphs which only differ by some types referencing forward-declared enum vs full enum. One such example in kernel is enum irqchip_irq_state, defined in include/linux/interrupt.h and forward-declared in include/linux/irq.h. This causes entire struct task_struct and all referenced types to be duplicated in btf_dedup output. This patch eliminates such duplication cases. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 51 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1b8d8cdd3575..87e3020ac1bc 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) /* Calculate type signature hash of ENUM. */ static __u32 btf_hash_enum(struct btf_type *t) { - struct btf_enum *member = (struct btf_enum *)(t + 1); - __u32 vlen = BTF_INFO_VLEN(t->info); - __u32 h = btf_hash_common(t); - int i; + __u32 h; - for (i = 0; i < vlen; i++) { - h = hash_combine(h, member->name_off); - h = hash_combine(h, member->val); - member++; - } + /* don't hash vlen and enum members to support enum fwd resolving */ + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info & ~0xffff); + h = hash_combine(h, t->size); return h; } @@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } +static inline bool btf_is_enum_fwd(struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM && + BTF_INFO_VLEN(t->info) == 0; +} + +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum(t1, t2); + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + /* * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, * as referenced type IDs equivalence is established separately during type @@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) new_id = cand_node->type_id; break; } + if (d->opts.dont_resolve_fwds) + continue; + if (btf_compat_enum(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_node->type_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_node->type_id] = type_id; + } } break; @@ -2084,15 +2107,15 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return fwd_kind == real_kind; } - if (cand_type->info != canon_type->info) - return 0; - switch (cand_kind) { case BTF_KIND_INT: return btf_equal_int(cand_type, canon_type); case BTF_KIND_ENUM: - return btf_equal_enum(cand_type, canon_type); + if (d->opts.dont_resolve_fwds) + return btf_equal_enum(cand_type, canon_type); + else + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: return btf_equal_common(cand_type, canon_type); @@ -2103,6 +2126,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + if (cand_type->info != canon_type->info) + return 0; return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); case BTF_KIND_ARRAY: { -- cgit v1.2.3 From 8fd7a61aa556ad518e897f58264a2e67f9c527f5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 10 Mar 2019 17:44:10 -0700 Subject: selftests/bpf: add fwd enum resolution test for btf_dedup This patch adds test verifying new btf_dedup logic of resolving forward-declared enums. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 38797aa627a7..23e3b314ca60 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5874,6 +5874,50 @@ const struct btf_dedup_test dedup_tests[] = { .dont_resolve_fwds = false, }, }, +{ + .descr = "dedup: enum fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [2] full enum 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [3] full enum 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [4] fwd enum 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [5] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [6] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [2] full enum 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [3] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [4] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, }; -- cgit v1.2.3 From 62369db2df8d1edfa040878203b446e023a16802 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:39 +0000 Subject: bpf: fix documentation for eBPF helpers Another round of minor fixes for the documentation of the BPF helpers located in the UAPI bpf.h header file. Changes include: - Moving around description of some helpers, to keep the descriptions in the same order as helpers are declared (bpf_map_push_elem(), leftover from commit 90b1023f68c7 ("bpf: fix documentation for eBPF helpers"), bpf_rc_keydown(), and bpf_skb_ancestor_cgroup_id()). - Fixing typos ("contex" -> "context"). - Harmonising return types ("void* " -> "void *", "uint64_t" -> "u64"). - Addition of the "bpf_" prefix to bpf_get_storage(). - Light additions of RST markup on some keywords. - Empty line deletion between description and return value for bpf_tcp_sock(). - Edit for the description for bpf_skb_ecn_set_ce() (capital letters, acronym expansion, no effect if ECT not set, more details on return value). Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 128 ++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 983b25cb608d..4465d00d3493 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -502,16 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1425,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -2098,52 +2088,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2149,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2181,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2279,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2346,33 +2346,35 @@ union bpf_attr { * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. * * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) * Description - * Return a **struct bpf_sock** pointer in TCP_LISTEN state. - * bpf_sk_release() is unnecessary and not allowed. + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ -- cgit v1.2.3 From 0eb0978528d47699edd091dc2c337952ad8da436 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:40 +0000 Subject: bpf: add documentation for helpers bpf_spin_lock(), bpf_spin_unlock() Add documentation for the BPF spinlock-related helpers to the doc in bpf.h. I added the constraints and restrictions coming with the use of spinlocks for BPF: not all of it is directly related to the use of the helper, but I thought it would be nice for users to find them in the man page. This list of restrictions is nearly a verbatim copy of the list in Alexei's commit log for those helpers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4465d00d3493..929c8e537a14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2343,6 +2343,61 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such -- cgit v1.2.3 From ea6eced00e4b28821804eee24e80d9528f48972a Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:41 +0000 Subject: tools: bpf: synchronise BPF UAPI header with tools Synchronise the bpf.h header under tools, to report the latest fixes and additions to the documentation for the BPF helpers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 183 +++++++++++++++++++++++++++-------------- 1 file changed, 120 insertions(+), 63 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 983b25cb608d..929c8e537a14 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -502,16 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1425,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -2098,52 +2088,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2149,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2181,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2279,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2343,36 +2343,93 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. * * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) * Description - * Return a **struct bpf_sock** pointer in TCP_LISTEN state. - * bpf_sk_release() is unnecessary and not allowed. + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ -- cgit v1.2.3 From 9804501fa1228048857910a6bf23e085aade37cc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 14 Mar 2019 13:47:59 +0800 Subject: appletalk: Fix potential NULL pointer dereference in unregister_snap_client register_snap_client may return NULL, all the callers check it, but only print a warning. This will result in NULL pointer dereference in unregister_snap_client and other places. It has always been used like this since v2.6 Reported-by: Dan Carpenter Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/atalk.h | 2 +- net/appletalk/aarp.c | 15 ++++++++++++--- net/appletalk/ddp.c | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index d5cfc0b15b76..f6034ba774be 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) #define AARP_RESOLVE_TIME (10 * HZ) extern struct datalink_proto *ddp_dl, *aarp_dl; -extern void aarp_proto_init(void); +extern int aarp_proto_init(void); /* Inter module exports */ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 49a16cee2aae..420a98bf79b5 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } timer_setup(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 795fbc6c06aa..709d2542f729 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { @@ -1921,17 +1918,22 @@ static int __init atalk_init(void) goto out_proto; ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); rc = register_netdevice_notifier(&ddp_notifier); if (rc) - goto out_sock; + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; - aarp_proto_init(); rc = atalk_proc_init(); if (rc) goto out_aarp; @@ -1945,11 +1947,13 @@ out_proc: atalk_proc_exit(); out_aarp: aarp_cleanup_module(); +out_dev: unregister_netdevice_notifier(&ddp_notifier); -out_sock: +out_snap: dev_remove_pack(&ppptalk_packet_type); dev_remove_pack(<alk_packet_type); unregister_snap_client(ddp_dl); +out_sock: sock_unregister(PF_APPLETALK); out_proto: proto_unregister(&ddp_proto); -- cgit v1.2.3 From 80acbed9f8fca1db3fbe915540b756f048aa0fd7 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 14 Mar 2019 21:43:19 +0200 Subject: net: stmmac: don't set own bit too early for jumbo frames Commit 0e80bdc9a72d ("stmmac: first frame prep at the end of xmit routine") overlooked jumbo frames when re-ordering the code, and as a result the own bit was not getting set anymore for the first jumbo frame descriptor. Commit 487e2e22ab79 ("net: stmmac: Set OWN bit for jumbo frames") tried to fix this, but now the bit is getting set too early and the DMA may start while we are still setting up the remaining descriptors. And with the chain mode the own bit remains still unset. Fix by setting the own bit at the end of xmit also with jumbo frames. Fixes: 0e80bdc9a72d ("stmmac: first frame prep at the end of xmit routine") Fixes: 487e2e22ab79 ("net: stmmac: Set OWN bit for jumbo frames") Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index d8c5bc412219..bc83ced94e1b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -59,7 +59,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, - STMMAC_RING_MODE, 1, false, skb->len); + STMMAC_RING_MODE, 0, false, skb->len); tx_q->tx_skbuff[entry] = NULL; entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); @@ -91,7 +91,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 1, true, skb->len); + STMMAC_RING_MODE, 0, true, skb->len); } tx_q->cur_tx = entry; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 97c5e1aad88f..6a2e1031a62a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3216,14 +3216,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, csum_insertion, priv->mode, 1, last_segment, skb->len); - - /* The own bit must be the latest setting done when prepare the - * descriptor and then barrier is needed to make sure that - * all is coherent before granting the DMA engine. - */ - wmb(); + } else { + stmmac_set_tx_owner(priv, first); } + /* The own bit must be the latest setting done when prepare the + * descriptor and then barrier is needed to make sure that + * all is coherent before granting the DMA engine. + */ + wmb(); + netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); stmmac_enable_dma_transmission(priv, priv->ioaddr); -- cgit v1.2.3 From 58f2ce6f61615dfd8dd3cc01c9e5bb54ed35637e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 14 Mar 2019 21:43:20 +0200 Subject: net: stmmac: fix jumbo frame sending with non-linear skbs When sending non-linear skbs with jumbo frames, we set up the non-paged data and mark that as a last segment, although the paged fragments are also prepared. This will stall the TX queue and trigger a watchdog warning (a simple reproducer is to run an iperf client mode TCP test with a large MTU - networking fails instantly). Fix by checking if the skb is non-linear. Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index bc83ced94e1b..f936166d8910 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -79,7 +79,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 0, len, csum, - STMMAC_RING_MODE, 1, true, skb->len); + STMMAC_RING_MODE, 1, !skb_is_nonlinear(skb), + skb->len); } else { des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); @@ -91,7 +92,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 0, true, skb->len); + STMMAC_RING_MODE, 0, !skb_is_nonlinear(skb), + skb->len); } tx_q->cur_tx = entry; -- cgit v1.2.3 From 5bf7295fe34a5251b1d241b9736af4697b590670 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Thu, 14 Mar 2019 15:31:40 -0500 Subject: qlcnic: Avoid potential NULL pointer dereference netdev_alloc_skb can fail and return a NULL pointer which is dereferenced without a check. The patch avoids such a scenario. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 3b0adda7cc9c..a4cd6f2cfb86 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1048,6 +1048,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) { skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE); + if (!skb) + break; qlcnic_create_loopback_buff(skb->data, adapter->mac_addr); skb_put(skb, QLCNIC_ILB_PKT_SIZE); adapter->ahw->diag_cnt = 0; -- cgit v1.2.3 From eab2fc822af38f31fd5f4e731b5d10b94904d919 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 14 Mar 2019 23:08:22 +0100 Subject: sch_cake: Interpret fwmark parameter as a bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We initially interpreted the fwmark parameter as a flag that simply turned on the feature, using the whole skb->mark field as the index into the CAKE tin_order array. However, it is quite common for different applications to use different parts of the mask field for their own purposes, each using a different mask. Support this use of subsets of the mark by interpreting the TCA_CAKE_FWMARK parameter as a bitmask to apply to the fwmark field when reading it. The result will be right-shifted by the number of unset lower bits of the mask before looking up the tin. In the original commit message we also failed to credit Felix Resch with originally suggesting the fwmark feature back in 2017; so the Suggested-By in this commit covers the whole fwmark feature. Fixes: 0b5c7efdfc6e ("sch_cake: Permit use of connmarks as tin classifiers") Suggested-by: Felix Resch Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 1d2a12132abc..acc9b9da985f 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -211,6 +211,9 @@ struct cake_sched_data { u8 ack_filter; u8 atm_mode; + u32 fwmark_mask; + u16 fwmark_shft; + /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ u16 rate_shft; ktime_t time_next_packet; @@ -258,8 +261,7 @@ enum { CAKE_FLAG_AUTORATE_INGRESS = BIT(1), CAKE_FLAG_INGRESS = BIT(2), CAKE_FLAG_WASH = BIT(3), - CAKE_FLAG_SPLIT_GSO = BIT(4), - CAKE_FLAG_FWMARK = BIT(5) + CAKE_FLAG_SPLIT_GSO = BIT(4) }; /* COBALT operates the Codel and BLUE algorithms in parallel, in order to @@ -1543,7 +1545,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, struct sk_buff *skb) { struct cake_sched_data *q = qdisc_priv(sch); - u32 tin; + u32 tin, mark; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding @@ -1551,14 +1553,13 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, */ dscp = cake_handle_diffserv(skb, q->rate_flags & CAKE_FLAG_WASH); + mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; - else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */ - skb->mark && - skb->mark <= q->tin_cnt) - tin = q->tin_order[skb->mark - 1]; + else if (mark && mark <= q->tin_cnt) + tin = q->tin_order[mark - 1]; else if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && @@ -2172,6 +2173,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { [TCA_CAKE_MPU] = { .type = NLA_U32 }, [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, + [TCA_CAKE_FWMARK] = { .type = NLA_U32 }, }; static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, @@ -2619,10 +2621,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_FWMARK]) { - if (!!nla_get_u32(tb[TCA_CAKE_FWMARK])) - q->rate_flags |= CAKE_FLAG_FWMARK; - else - q->rate_flags &= ~CAKE_FLAG_FWMARK; + q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); + q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; } if (q->tins) { @@ -2784,8 +2784,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_FWMARK, - !!(q->rate_flags & CAKE_FLAG_FWMARK))) + if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit v1.2.3 From 3d4c3cec0909dc9c40db82a74aae0cdf3f5ad138 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Mar 2019 23:47:13 +0000 Subject: drivers: net: atp: fix various indentation issues There is a statement that is indented incorrectly; replace spaces with a tab. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/atp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index cfb67b746595..58e0ca9093d3 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -482,7 +482,7 @@ static void hardware_init(struct net_device *dev) write_reg_high(ioaddr, IMR, ISRh_RxErr); lp->tx_unit_busy = 0; - lp->pac_cnt_in_tx_buf = 0; + lp->pac_cnt_in_tx_buf = 0; lp->saved_tx_size = 0; } -- cgit v1.2.3 From 68cfe9a286f3ee2371de00ab666b4949ff285196 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Mar 2019 23:56:35 +0000 Subject: net: sis900: fix indentation issues, remove some spaces There are several statements that contain extra spacing in the indentation; clean this up by removing spaces. Also add { } braces on if statement to keep to kernel coding style. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 6073387511f8..67f9bb6e941b 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -730,10 +730,10 @@ static u16 sis900_default_phy(struct net_device * net_dev) status = mdio_read(net_dev, phy->phy_addr, MII_STATUS); /* Link ON & Not select default PHY & not ghost PHY */ - if ((status & MII_STAT_LINK) && !default_phy && - (phy->phy_types != UNKNOWN)) - default_phy = phy; - else { + if ((status & MII_STAT_LINK) && !default_phy && + (phy->phy_types != UNKNOWN)) { + default_phy = phy; + } else { status = mdio_read(net_dev, phy->phy_addr, MII_CONTROL); mdio_write(net_dev, phy->phy_addr, MII_CONTROL, status | MII_CNTL_AUTO | MII_CNTL_ISOLATE); @@ -741,7 +741,7 @@ static u16 sis900_default_phy(struct net_device * net_dev) phy_home = phy; else if(phy->phy_types == LAN) phy_lan = phy; - } + } } if (!default_phy && phy_home) -- cgit v1.2.3 From 228cd2dba27cee9956c1af97e6445be056881e41 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:12:06 -0500 Subject: net: strparser: fix a missing check for create_singlethread_workqueue In case create_singlethread_workqueue fails, the check returns an error to callers to avoid potential NULL pointer dereferences. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/strparser/strparser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index da1a676860ca..860dcfb95ee4 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -550,6 +550,8 @@ EXPORT_SYMBOL_GPL(strp_check_rcv); static int __init strp_mod_init(void) { strp_wq = create_singlethread_workqueue("kstrp"); + if (unlikely(!strp_wq)) + return -ENOMEM; return 0; } -- cgit v1.2.3 From 8a3c245c031944f2176118270e7bc5d4fd4a1075 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 14 Mar 2019 10:45:23 -0300 Subject: net: add documentation to socket.c Adds missing sphinx documentation to the socket.c's functions. Also fixes some whitespaces. I also changed the style of older documentation as an effort to have an uniform documentation style. Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++ include/linux/socket.h | 12 +-- net/socket.c | 277 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 271 insertions(+), 24 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 651fca72286c..c606c72311d0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -83,6 +83,12 @@ enum sock_type { #endif /* ARCH_HAS_SOCKET_TYPES */ +/** + * enum sock_shutdown_cmd - Shutdown types + * @SHUT_RD: shutdown receptions + * @SHUT_WR: shutdown transmissions + * @SHUT_RDWR: shutdown receptions/transmissions + */ enum sock_shutdown_cmd { SHUT_RD, SHUT_WR, diff --git a/include/linux/socket.h b/include/linux/socket.h index 6016daeecee4..b57cd8bf96e2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,7 +26,7 @@ typedef __kernel_sa_family_t sa_family_t; /* * 1003.1g requires sa_family_t and that sa_data is char. */ - + struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ @@ -44,7 +44,7 @@ struct linger { * system, not 4.3. Thus msg_accrights(len) are now missing. They * belong in an obscure libc emulation or the bin. */ - + struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -54,7 +54,7 @@ struct msghdr { unsigned int msg_flags; /* flags on received message */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; - + struct user_msghdr { void __user *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -122,7 +122,7 @@ struct cmsghdr { * inside range, given by msg->msg_controllen before using * ancillary object DATA. --ANK (980731) */ - + static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { @@ -264,10 +264,10 @@ struct ucred { /* Maximum queue length specifiable by listen. */ #define SOMAXCONN 128 -/* Flags we can use with send/ and recv. +/* Flags we can use with send/ and recv. Added those for 1003.1g not all are supported yet */ - + #define MSG_OOB 1 #define MSG_PEEK 2 #define MSG_DONTROUTE 4 diff --git a/net/socket.c b/net/socket.c index 3c176a12fe48..8255f5bda0aa 100644 --- a/net/socket.c +++ b/net/socket.c @@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ +/** + * sock_alloc_file - Bind a &socket to a &file + * @sock: socket + * @flags: file status flags + * @dname: protocol name + * + * Returns the &file bound with @sock, implicitly storing it + * in sock->file. If dname is %NULL, sets to "". + * On failure the return is a ERR pointer (see linux/err.h). + * This function uses GFP_KERNEL internally. + */ + struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct file *file; @@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags) return PTR_ERR(newfile); } +/** + * sock_from_file - Return the &socket bounded to @file. + * @file: file + * @err: pointer to an error code return + * + * On failure returns %NULL and assigns -ENOTSOCK to @err. + */ + struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) @@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = { }; /** - * sock_alloc - allocate a socket + * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes - * NULL is returned. + * NULL is returned. This functions uses GFP_KERNEL internally. */ struct socket *sock_alloc(void) @@ -561,7 +581,7 @@ struct socket *sock_alloc(void) EXPORT_SYMBOL(sock_alloc); /** - * sock_release - close a socket + * sock_release - close a socket * @sock: socket to close * * The socket is released from the protocol stack if it has a release @@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ + static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); @@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) } EXPORT_SYMBOL(sock_sendmsg); +/** + * kernel_sendmsg - send a message through @sock (kernel-space) + * @sock: socket + * @msg: message header + * @vec: kernel vec + * @num: vec array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + */ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +/** + * kernel_sendmsg_locked - send a message through @sock (kernel-space) + * @sk: sock + * @msg: message header + * @vec: output s/g array + * @num: output s/g array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + * Caller must hold @sk. + */ + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +/** + * sock_recvmsg - receive a message from @sock + * @sock: socket + * @msg: message to receive + * @flags: message flags + * + * Receives @msg from @sock, passing through LSM. Returns the total number + * of bytes received, or an error. + */ + static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { @@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) EXPORT_SYMBOL(sock_recvmsg); /** - * kernel_recvmsg - Receive a message from a socket (kernel space) - * @sock: The socket to receive the message from - * @msg: Received message - * @vec: Input s/g array for message data - * @num: Size of input s/g array - * @size: Number of bytes to read - * @flags: Message flags (MSG_DONTWAIT, etc...) + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) * - * On return the msg structure contains the scatter/gather array passed in the - * vec argument. The array is modified so that it consists of the unfilled - * portion of the original array. + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. * - * The returned value is the total number of bytes received, or an error. + * The returned value is the total number of bytes received, or an error. */ + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ + struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; @@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) return err; } +/** + * sock_create_lite - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * Creates a new socket and assigns it to @res, passing through LSM. + * The new socket initialization is not complete, see kernel_accept(). + * Returns 0 or an error. On failure @res is set to %NULL. + * This function internally uses GFP_KERNEL. + */ + int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; @@ -1224,6 +1309,21 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); +/** + * __sock_create - creates a socket + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * @kern: boolean for kernel space sockets + * + * Creates a new socket and assigns it to @res, passing through LSM. + * Returns 0 or an error. On failure @res is set to %NULL. @kern must + * be set to true if the socket resides in kernel space. + * This function internally uses GFP_KERNEL. + */ + int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { @@ -1333,12 +1433,35 @@ out_release: } EXPORT_SYMBOL(__sock_create); +/** + * sock_create - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } EXPORT_SYMBOL(sock_create); +/** + * sock_create_kern - creates a socket (kernel space) + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { return __sock_create(net, family, type, protocol, res, 1); @@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, } #endif +/** + * kernel_bind - bind an address to a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: length of address + * + * Returns 0 or an error. + */ + int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } EXPORT_SYMBOL(kernel_bind); +/** + * kernel_listen - move socket to listening state (kernel space) + * @sock: socket + * @backlog: pending connections queue size + * + * Returns 0 or an error. + */ + int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } EXPORT_SYMBOL(kernel_listen); +/** + * kernel_accept - accept a connection (kernel space) + * @sock: listening socket + * @newsock: new connected socket + * @flags: flags + * + * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. + * If it fails, @newsock is guaranteed to be %NULL. + * Returns 0 or an error. + */ + int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; @@ -3359,6 +3510,19 @@ done: } EXPORT_SYMBOL(kernel_accept); +/** + * kernel_connect - connect a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: address length + * @flags: flags (O_NONBLOCK, ...) + * + * For datagram sockets, @addr is the addres to which datagrams are sent + * by default, and the only address from which datagrams are received. + * For stream sockets, attempts to connect to @addr. + * Returns 0 or an error code. + */ + int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { @@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); +/** + * kernel_getsockname - get the address which the socket is bound (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is bound. + * Returns 0 or an error code. + */ + int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); +/** + * kernel_peername - get the address which the socket is connected (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is connected. + * Returns 0 or an error code. + */ + int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); +/** + * kernel_getsockopt - get a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Assigns the option length to @optlen. + * Returns 0 or an error. + */ + int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { @@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_getsockopt); +/** + * kernel_setsockopt - set a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Returns 0 or an error. + */ + int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { @@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_setsockopt); +/** + * kernel_sendpage - send a &page through a socket (kernel space) + * @sock: socket + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + */ + int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +/** + * kernel_sendpage_locked - send a &page through the locked sock (kernel space) + * @sk: sock + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + * Caller must hold @sk. + */ + int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); +/** + * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * @sock: socket + * @how: connection part + * + * Returns 0 or an error. + */ + int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); -/* This routine returns the IP overhead imposed by a socket i.e. - * the length of the underlying IP header, depending on whether - * this is an IPv4 or IPv6 socket and the length from IP options turned - * on at the socket. Assumes that the caller has a lock on the socket. +/** + * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket + * @sk: socket + * + * This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. Assumes that the caller has a lock on the socket. */ + u32 kernel_sock_ip_overhead(struct sock *sk) { struct inet_sock *inet; -- cgit v1.2.3 From daa5c4d0167a308306525fd5ab9a5e18e21f4f74 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 14 Mar 2019 14:49:45 +0100 Subject: net: phy: meson-gxl: fix interrupt support If an interrupt is already pending when the interrupt is enabled on the GXL phy, no IRQ will ever be triggered. The fix is simply to make sure pending IRQs are cleared before setting up the irq mask. Fixes: cf127ff20af1 ("net: phy: meson-gxl: add interrupt support") Signed-off-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/meson-gxl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index a238388eb1a5..0eec2913c289 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -201,6 +201,7 @@ static int meson_gxl_ack_interrupt(struct phy_device *phydev) static int meson_gxl_config_intr(struct phy_device *phydev) { u16 val; + int ret; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { val = INTSRC_ANEG_PR @@ -213,6 +214,11 @@ static int meson_gxl_config_intr(struct phy_device *phydev) val = 0; } + /* Ack any pending IRQ */ + ret = meson_gxl_ack_interrupt(phydev); + if (ret) + return ret; + return phy_write(phydev, INTSRC_MASK, val); } -- cgit v1.2.3 From 4477138fa0ae4e1b699786ef0600863ea6e6c61c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2019 20:19:47 -0700 Subject: tun: properly test for IFF_UP Same reasons than the ones explained in commit 4179cb5a4c92 ("vxlan: test dev->flags & IFF_UP before calling netif_rx()") netif_rx_ni() or napi_gro_frags() must be called under a strict contract. At device dismantle phase, core networking clears IFF_UP and flush_all_backlogs() is called after rcu grace period to make sure no incoming packet might be in a cpu backlog and still referencing the device. A similar protocol is used for gro layer. Most drivers call netif_rx() from their interrupt handler, and since the interrupts are disabled at device dismantle, netif_rx() does not have to check dev->flags & IFF_UP Virtual drivers do not have this guarantee, and must therefore make the check themselves. Fixes: 1bd4978a88ac ("tun: honor IFF_UP in tun_get_user()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/tun.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1d68921723dc..0d343359f647 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1763,9 +1763,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); - if (!(tun->dev->flags & IFF_UP)) - return -EIO; - if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) return -EINVAL; @@ -1867,6 +1864,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, err = skb_copy_datagram_from_iter(skb, 0, from, len); if (err) { + err = -EFAULT; +drop: this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); if (frags) { @@ -1874,7 +1873,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, mutex_unlock(&tfile->napi_mutex); } - return -EFAULT; + return err; } } @@ -1958,6 +1957,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); + rcu_read_lock(); + if (unlikely(!(tun->dev->flags & IFF_UP))) { + err = -EIO; + goto drop; + } + if (frags) { /* Exercise flow dissector code path. */ u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); @@ -1965,6 +1970,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (unlikely(headlen > skb_headlen(skb))) { this_cpu_inc(tun->pcpu_stats->rx_dropped); napi_free_frags(&tfile->napi); + rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); WARN_ON(1); return -ENOMEM; @@ -1992,6 +1998,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { netif_rx_ni(skb); } + rcu_read_unlock(); stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); -- cgit v1.2.3 From 044175a06706d516aa42874bb44dbbfc3c4d20eb Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 13 Mar 2019 15:15:49 +0100 Subject: xsk: fix umem memory leak on cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the umem is cleaned up, the task that created it might already be gone. If the task was gone, the xdp_umem_release function did not free the pages member of struct xdp_umem. It turned out that the task lookup was not needed at all; The code was a left-over when we moved from task accounting to user accounting [1]. This patch fixes the memory leak by removing the task lookup logic completely. [1] https://lore.kernel.org/netdev/20180131135356.19134-3-bjorn.topel@gmail.com/ Link: https://lore.kernel.org/netdev/c1cb2ca8-6a14-3980-8672-f3de0bb38dfd@suse.cz/ Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt") Reported-by: Jiri Slaby Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 1 - net/xdp/xdp_umem.c | 19 +------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 61cf7dbb6782..d074b6d60f8a 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -36,7 +36,6 @@ struct xdp_umem { u32 headroom; u32 chunk_size_nohr; struct user_struct *user; - struct pid *pid; unsigned long address; refcount_t users; struct work_struct work; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 77520eacee8f..989e52386c35 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem) { - struct task_struct *task; - struct mm_struct *mm; - xdp_umem_clear_dev(umem); ida_simple_remove(&umem_ida, umem->id); @@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_unpin_pages(umem); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); - if (!task) - goto out; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - goto out; - - mmput(mm); kfree(umem->pages); umem->pages = NULL; xdp_umem_unaccount_pages(umem); -out: kfree(umem); } @@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (size_chk < 0) return -EINVAL; - umem->pid = get_task_pid(current, PIDTYPE_PID); umem->address = (unsigned long)addr; umem->chunk_mask = ~((u64)chunk_size - 1); umem->size = size; @@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) err = xdp_umem_account_pages(umem); if (err) - goto out; + return err; err = xdp_umem_pin_pages(umem); if (err) @@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) out_account: xdp_umem_unaccount_pages(umem); -out: - put_pid(umem->pid); return err; } -- cgit v1.2.3 From 86be36f6502c52ddb4b85938145324fd07332da1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 15 Mar 2019 20:21:19 +0530 Subject: powerpc: bpf: Fix generation of load/store DW instructions Yauheni Kaliuta pointed out that PTR_TO_STACK store/load verifier test was failing on powerpc64 BE, and rightfully indicated that the PPC_LD() macro is not masking away the last two bits of the offset per the ISA, resulting in the generation of 'lwa' instruction instead of the intended 'ld' instruction. Segher also pointed out that we can't simply mask away the last two bits as that will result in loading/storing from/to a memory location that was not intended. This patch addresses this by using ldx/stdx if the offset is not word-aligned. We load the offset into a temporary register (TMP_REG_2) and use that as the index register in a subsequent ldx/stdx. We fix PPC_LD() macro to mask off the last two bits, but enhance PPC_BPF_LL() and PPC_BPF_STL() to factor in the offset value and generate the proper instruction sequence. We also convert all existing users of PPC_LD() and PPC_STD() to use these macros. All existing uses of these macros have been audited to ensure that TMP_REG_2 can be clobbered. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Cc: stable@vger.kernel.org # v4.9+ Reported-by: Yauheni Kaliuta Signed-off-by: Naveen N. Rao Signed-off-by: Daniel Borkmann --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/net/bpf_jit.h | 17 +++++------------ arch/powerpc/net/bpf_jit32.h | 4 ++++ arch/powerpc/net/bpf_jit64.h | 20 ++++++++++++++++++++ arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++------ 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index f9513ad38fa6..e6c9ad088f45 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -302,6 +302,7 @@ /* Misc instructions for BPF compiler */ #define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 +#define PPC_INST_LDX 0x7c00002a #define PPC_INST_LHZ 0xa0000000 #define PPC_INST_LWZ 0x80000000 #define PPC_INST_LHBRX 0x7c00062c @@ -309,6 +310,7 @@ #define PPC_INST_STB 0x98000000 #define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 +#define PPC_INST_STDX 0x7c00012a #define PPC_INST_STDU 0xf8000001 #define PPC_INST_STW 0x90000000 #define PPC_INST_STWU 0x94000000 diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 549e9490ff2a..dcac37745b05 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -51,6 +51,8 @@ #define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) #define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) #define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ @@ -65,7 +67,9 @@ #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) + ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ @@ -85,17 +89,6 @@ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) - -#ifdef CONFIG_PPC64 -#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) -#else -#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) -#endif - #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 6f4daacad296..ade04547703f 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -123,6 +123,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) #endif +#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) +#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) +#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) + #define SEEN_DATAREF 0x10000 /* might call external helpers */ #define SEEN_XREG 0x20000 /* X reg is used */ #define SEEN_MEM 0x40000 /* SEEN_MEM+(1< MAX_TAIL_CALL_CNT) * goto out; */ - PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); PPC_BCC(COND_GT, out); @@ -265,7 +265,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 /* prog = array->ptrs[index]; */ PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); - PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) @@ -275,7 +275,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], @@ -606,7 +606,7 @@ bpf_alu32_trunc: * the instructions generated will remain the * same across all passes */ - PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx)); + PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); break; @@ -662,7 +662,7 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_STD(src_reg, dst_reg, off); + PPC_BPF_STL(src_reg, dst_reg, off); break; /* @@ -709,7 +709,7 @@ emit_clear: break; /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: - PPC_LD(dst_reg, src_reg, off); + PPC_BPF_LL(dst_reg, src_reg, off); break; /* -- cgit v1.2.3 From 6f19893b644a9454d85e593b5e90914e7a72b7dd Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:20:16 -0500 Subject: net: openvswitch: fix a NULL pointer dereference upcall is dereferenced even when genlmsg_put fails. The fix goto out to avoid the NULL pointer dereference in this case. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6679e96ab1dc..45d1469308b0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -448,6 +448,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); + if (!upcall) { + err = -EINVAL; + goto out; + } upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); -- cgit v1.2.3 From 0fff9bd47e1341b5c4db862cc39fc68ce45f165d Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 01:11:22 -0500 Subject: net: openvswitch: fix missing checks for nla_nest_start nla_nest_start may fail and thus deserves a check. The fix returns -EMSGSIZE when it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45d1469308b0..9dd158ab51b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,6 +464,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); BUG_ON(err); @@ -472,6 +476,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->actions_len) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_actions(upcall_info->actions, upcall_info->actions_len, user_skb); -- cgit v1.2.3 From 07660ca679da3007d3231938e2dfb415d3440716 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 01:14:33 -0500 Subject: net: ncsi: fix a missing check for nla_nest_start nla_nest_start may fail and thus deserves a check. The fix returns -EMSGSIZE in case it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/ncsi/ncsi-netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 5d782445d2fc..bad17bba8ba7 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -251,6 +251,10 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, } attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + if (!attr) { + rc = -EMSGSIZE; + goto err; + } rc = ncsi_write_package_info(skb, ndp, package->id); if (rc) { nla_nest_cancel(skb, attr); -- cgit v1.2.3 From 4589e28db46ee4961edfd794c5bb43887d38c8e5 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 12:11:59 -0500 Subject: net: tipc: fix a missing check of nla_nest_start nla_nest_start could fail and requires a check. The fix returns -EMSGSIZE if it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/group.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/group.c b/net/tipc/group.c index 06fee142f09f..63f39201e41e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -919,6 +919,9 @@ int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) { struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + if (!group) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, grp->type) || nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE, -- cgit v1.2.3 From 9180bb4f046064dfa4541488102703b402bb04e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 16 Mar 2019 13:09:53 -0700 Subject: tun: add a missing rcu_read_unlock() in error path In my latest patch I missed one rcu_read_unlock(), in case device is down. Fixes: 4477138fa0ae ("tun: properly test for IFF_UP") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0d343359f647..e9ca1c088d0b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1960,6 +1960,7 @@ drop: rcu_read_lock(); if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; + rcu_read_unlock(); goto drop; } -- cgit v1.2.3 From 517ccc2aa50dbd7767a9eb8e1d9987a3ed7ced3e Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sat, 16 Mar 2019 16:46:05 -0500 Subject: net: tipc: fix a missing check for nla_nest_start nla_nest_start may fail. The fix check its status and returns -EMSGSIZE in case it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3274ef625dba..d6b26862b34e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3255,6 +3255,8 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) peer_port = tsk_peer_port(tsk); nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + if (!nest) + return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) goto msg_full; -- cgit v1.2.3 From 65e9a6d25deb752d97b88335068dc0e7accbc9c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 17 Mar 2019 17:17:45 -0700 Subject: networking: fix snmp_counter.rst Doc. Warnings Fix documentation markup warnings in snmp_counter.rst: Documentation/networking/snmp_counter.rst:416: WARNING: Title underline too short. Documentation/networking/snmp_counter.rst:684: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:693: WARNING: Title underline too short. Documentation/networking/snmp_counter.rst:707: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:712: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:722: WARNING: Title underline too short. Documentation/networking/snmp_counter.rst:733: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:736: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:739: WARNING: Bullet list ends without a blank line; unexpected unindent. Fixes: 80cc49507ba48 ("net: Add part of TCP counts explanations in snmp_counters.rst") Fixes: 8e2ea53a83dfb ("add snmp counters document") Fixes: a6c7c7aac2de6 ("net: add document for several snmp counters") Signed-off-by: Randy Dunlap Cc: yupeng --- Documentation/networking/snmp_counter.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst index 52b026be028f..38a4edc4522b 100644 --- a/Documentation/networking/snmp_counter.rst +++ b/Documentation/networking/snmp_counter.rst @@ -413,7 +413,7 @@ algorithm. .. _F-RTO: https://tools.ietf.org/html/rfc5682 TCP Fast Path -============ +============= When kernel receives a TCP packet, it has two paths to handler the packet, one is fast path, another is slow path. The comment in kernel code provides a good explanation of them, I pasted them below:: @@ -681,6 +681,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender. * TcpExtTCPDSACKRecv + The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received. @@ -690,7 +691,7 @@ The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received. invalid SACK and DSACK -==================== +====================== When a SACK (or DSACK) block is invalid, a corresponding counter would be updated. The validation method is base on the start/end sequence number of the SACK block. For more details, please refer the comment @@ -704,11 +705,13 @@ explaination: .. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32 * TcpExtTCPSACKDiscard + This counter indicates how many SACK blocks are invalid. If the invalid SACK block is caused by ACK recording, the TCP stack will only ignore it and won't update this counter. * TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo + When a DSACK block is invalid, one of these two counters would be updated. Which counter will be updated depends on the undo_marker flag of the TCP socket. If the undo_marker is not set, the TCP stack isn't @@ -719,7 +722,7 @@ will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld will be updated. As implied in its name, it might be an old packet. SACK shift -========= +========== The linux networking stack stores data in sk_buff struct (skb for short). If a SACK block acrosses multiple skb, the TCP stack will try to re-arrange data in these skb. E.g. if a SACK block acknowledges seq @@ -730,12 +733,15 @@ seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be discard, this operation is 'merge'. * TcpExtTCPSackShifted + A skb is shifted * TcpExtTCPSackMerged + A skb is merged * TcpExtTCPSackShiftFallback + A skb should be shifted or merged, but the TCP stack doesn't do it for some reasons. -- cgit v1.2.3 From ea239314fe42ace880bdd834256834679346c80e Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Sun, 17 Mar 2019 18:46:42 +0100 Subject: tipc: allow service ranges to be connect()'ed on RDM/DGRAM We move the check that prevents connecting service ranges to after the RDM/DGRAM check, and move address sanity control to a separate function that also validates the service range. Fixes: 23998835be98 ("tipc: improve address sanity check in tipc_connect()") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d6b26862b34e..b542f14ed444 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2349,6 +2349,16 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return 0; } +static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) +{ + if (addr->family != AF_TIPC) + return false; + if (addr->addrtype == TIPC_SERVICE_RANGE) + return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); + return (addr->addrtype == TIPC_SERVICE_ADDR || + addr->addrtype == TIPC_SOCKET_ADDR); +} + /** * tipc_connect - establish a connection to another TIPC port * @sock: socket structure @@ -2384,18 +2394,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!tipc_sk_type_connectionless(sk)) res = -EINVAL; goto exit; - } else if (dst->family != AF_TIPC) { - res = -EINVAL; } - if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) + if (!tipc_sockaddr_is_sane(dst)) { res = -EINVAL; - if (res) goto exit; - + } /* DGRAM/RDM connect(), just save the destaddr */ if (tipc_sk_type_connectionless(sk)) { memcpy(&tsk->peer, dest, destlen); goto exit; + } else if (dst->addrtype == TIPC_SERVICE_RANGE) { + res = -EINVAL; + goto exit; } previous = sk->sk_state; -- cgit v1.2.3 From 29b0b5d56589d66bd5793f1e09211ce7d7d3cd36 Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Mon, 11 Mar 2019 17:18:42 +0100 Subject: netfilter: nf_conntrack_sip: remove direct dependency on IPv6 Previous implementation was not usable with CONFIG_IPV6=m. Fixes: a3419ce3356c ("netfilter: nf_conntrack_sip: add sip_external_media logic") Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f067c6b50857..39fcc1ed18f3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -20,9 +20,9 @@ #include #include #include +#include +#include -#include -#include #include #include #include @@ -871,38 +871,33 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, } else if (sip_external_media) { struct net_device *dev = skb_dst(skb)->dev; struct net *net = dev_net(dev); - struct rtable *rt; - struct flowi4 fl4 = {}; -#if IS_ENABLED(CONFIG_IPV6) - struct flowi6 fl6 = {}; -#endif + struct flowi fl; struct dst_entry *dst = NULL; + memset(&fl, 0, sizeof(fl)); + switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: - fl4.daddr = daddr->ip; - rt = ip_route_output_key(net, &fl4); - if (!IS_ERR(rt)) - dst = &rt->dst; + fl.u.ip4.daddr = daddr->ip; + nf_ip_route(net, &dst, &fl, false); break; -#if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: - fl6.daddr = daddr->in6; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - dst_release(dst); - dst = NULL; - } + fl.u.ip6.daddr = daddr->in6; + nf_ip6_route(net, &dst, &fl, false); break; -#endif } /* Don't predict any conntracks when media endpoint is reachable * through the same interface as the signalling peer. */ - if (dst && dst->dev == dev) - return NF_ACCEPT; + if (dst) { + bool external_media = (dst->dev == dev); + + dst_release(dst); + if (external_media) + return NF_ACCEPT; + } } /* We need to check whether the registration exists before attempting -- cgit v1.2.3 From 05b7639da55f5555b9866a1f4b7e8995232a6323 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 12 Mar 2019 12:10:59 +0100 Subject: netfilter: nft_set_rbtree: check for inactive element after flag mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise, we hit bogus ENOENT when removing elements. Fixes: e701001e7cbe ("netfilter: nft_rbtree: allow adjacent intervals with dynamic updates") Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fa61208371f8..321a0036fdf5 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -308,10 +308,6 @@ static void *nft_rbtree_deactivate(const struct net *net, else if (d > 0) parent = parent->rb_right; else { - if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; - continue; - } if (nft_rbtree_interval_end(rbe) && !nft_rbtree_interval_end(this)) { parent = parent->rb_left; @@ -320,6 +316,9 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; } nft_rbtree_flush(net, set, rbe); return rbe; -- cgit v1.2.3 From e166e4fdaced850bee3d5ee12a5740258fb30587 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 13 Mar 2019 16:33:29 +0800 Subject: netfilter: bridge: set skb transport_header before entering NF_INET_PRE_ROUTING Since Commit 21d1196a35f5 ("ipv4: set transport header earlier"), skb->transport_header has been always set before entering INET netfilter. This patch is to set skb->transport_header for bridge before entering INET netfilter by bridge-nf-call-iptables. It also fixes an issue that sctp_error() couldn't compute a right csum due to unset skb->transport_header. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Reported-by: Li Shuang Suggested-by: Pablo Neira Ayuso Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 1 + net/bridge/br_netfilter_ipv6.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 9d34de68571b..22afa566cbce 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -502,6 +502,7 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 564710f88f93..e88d6641647b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); -- cgit v1.2.3 From d1fa381033eb718df5c602f64b6e88676138dfc6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2019 22:15:59 +0100 Subject: netfilter: fix NETFILTER_XT_TARGET_TEE dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With NETFILTER_XT_TARGET_TEE=y and IP6_NF_IPTABLES=m, we get a link error when referencing the NF_DUP_IPV6 module: net/netfilter/xt_TEE.o: In function `tee_tg6': xt_TEE.c:(.text+0x14): undefined reference to `nf_dup_ipv6' The problem here is the 'select NF_DUP_IPV6 if IP6_NF_IPTABLES' that forces NF_DUP_IPV6 to be =m as well rather than setting it to =y as was intended here. Adding a soft dependency on IP6_NF_IPTABLES avoids that broken configuration. Fixes: 5d400a4933e8 ("netfilter: Kconfig: Change select IPv6 dependencies") Cc: Máté Eckl Cc: Taehee Yoo Link: https://patchwork.ozlabs.org/patch/999498/ Link: https://lore.kernel.org/patchwork/patch/960062/ Reported-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d43ffb09939b..6548271209a0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1007,6 +1007,7 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- -- cgit v1.2.3 From 6d65561f3d5ec933151939c543d006b79044e7a6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 02:58:18 -0500 Subject: netfilter: ip6t_srh: fix NULL pointer dereferences skb_header_pointer may return NULL. The current code dereference its return values without a NULL check. The fix inserts the checks to avoid NULL pointer dereferences. Fixes: 202a8ff545cc ("netfilter: add IPv6 segment routing header 'srh' match") Signed-off-by: Kangjie Lu Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_srh.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 1059894a6f4c..4cb83fb69844 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) psidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left + 1) * sizeof(struct in6_addr)); psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid); + if (!psid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID, ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk, &srhinfo->psid_addr))) @@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left - 1) * sizeof(struct in6_addr)); nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid); + if (!nsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID, ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk, &srhinfo->nsid_addr))) @@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (srhinfo->mt_flags & IP6T_SRH_LSID) { lsidoff = srhoff + sizeof(struct ipv6_sr_hdr); lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid); + if (!lsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID, ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk, &srhinfo->lsid_addr))) -- cgit v1.2.3 From 8ffcd32f64633926163cdd07a7d295c500a947d1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2019 10:50:20 +0100 Subject: netfilter: nf_tables: bogus EBUSY in helper removal from transaction Proper use counter updates when activating and deactivating the object, otherwise, this hits bogus EBUSY error. Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") Reported-by: Laura Garcia Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_objref.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 457a9ceb46af..8dfa798ea683 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -65,21 +65,34 @@ nla_put_failure: return -1; } -static void nft_objref_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_objref_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_object *obj = nft_objref_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + obj->use--; } +static void nft_objref_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->use++; +} + static struct nft_expr_type nft_objref_type; static const struct nft_expr_ops nft_objref_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), .eval = nft_objref_eval, .init = nft_objref_init, - .destroy = nft_objref_destroy, + .activate = nft_objref_activate, + .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, }; -- cgit v1.2.3 From 74710e05906c37da6b436386dc13c44dbe5d8308 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 15 Mar 2019 17:21:01 +0100 Subject: netfilter: nft_redir: fix module autoload with ip4 AF_INET4 does not exist. Fixes: c78efc99c750 ("netfilter: nf_tables: nat: merge nft_redir protocol specific modules)" Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index f8092926f704..a340cd8a751b 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -233,5 +233,5 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez "); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); -- cgit v1.2.3 From f01a7dbe98ae4265023fa5d3af0f076f0b18a647 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Mon, 18 Mar 2019 16:10:26 +0100 Subject: bpf: Try harder when allocating memory for large maps It has been observed that sometimes a higher order memory allocation for BPF maps fails when there is no obvious memory pressure in a system. E.g. the map (BPF_MAP_TYPE_LRU_HASH, key=38, value=56, max_elems=524288) could not be created due to vmalloc unable to allocate 75497472B, when the system's memory consumption (in MB) was the following: Total: 3942 Used: 837 (21.24%) Free: 138 Buffers: 239 Cached: 2727 Later analysis [1] by Michal Hocko showed that the vmalloc was not trying to reclaim memory from the page cache and was failing prematurely due to __GFP_NORETRY. Considering dcda9b0471 ("mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic") and [1], we can replace __GFP_NORETRY with __GFP_RETRY_MAYFAIL, as it won't invoke OOM killer and will try harder to fulfil allocation requests. Unfortunately, replacing the body of the BPF map memory allocation function with the kvmalloc_node helper function is not an option at this point in time, given 1) kmalloc is non-optional for higher order allocations, and 2) passing __GFP_RETRY_MAYFAIL to the kmalloc would stress the slab allocator too much for large requests. The change has been tested with the workloads mentioned above and by observing oom_kill value from /proc/vmstat. [1]: https://lore.kernel.org/bpf/20190310071318.GW5232@dhcp22.suse.cz/ Signed-off-by: Martynas Pumputis Acked-by: Yonghong Song Cc: Michal Hocko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20190318153940.GL8924@dhcp22.suse.cz/ --- kernel/bpf/syscall.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 62f6bced3a3c..afca36f53c49 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) void *bpf_map_area_alloc(size_t size, int numa_node) { - /* We definitely need __GFP_NORETRY, so OOM killer doesn't - * trigger under memory pressure as we really just want to - * fail instead. + /* We really just want to fail instead of triggering OOM killer + * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, + * which is used for lower order allocation requests. + * + * It has been observed that higher order allocation requests done by + * vmalloc with __GFP_NORETRY being set might fail due to not trying + * to reclaim memory from the page cache, thus we set + * __GFP_RETRY_MAYFAIL to avoid such situations. */ - const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + + const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; void *area; if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - area = kmalloc_node(size, GFP_USER | flags, numa_node); + area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, + numa_node); if (area != NULL) return area; } - return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, - __builtin_return_address(0)); + return __vmalloc_node_flags_caller(size, numa_node, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + flags, __builtin_return_address(0)); } void bpf_map_area_free(void *area) -- cgit v1.2.3 From 25208dd856e74f2b60d053eb98e6dd335816fbc1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 18 Mar 2019 12:08:58 +0100 Subject: doc: fix link to MSG_ZEROCOPY patchset Use https and link to the patch directly. Signed-off-by: Tobias Klauser Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/msg_zerocopy.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index 18c1415e7bfa..ace56204dd03 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -50,7 +50,7 @@ the excellent reporting over at LWN.net or read the original code. patchset [PATCH net-next v4 0/9] socket sendmsg MSG_ZEROCOPY - http://lkml.kernel.org/r/20170803202945.70750-1-willemdebruijn.kernel@gmail.com + https://lkml.kernel.org/netdev/20170803202945.70750-1-willemdebruijn.kernel@gmail.com Interface -- cgit v1.2.3 From e5dcc0c3223c45c94100f05f28d8ef814db3d82c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Mar 2019 10:41:14 -0700 Subject: net: rose: fix a possible stack overflow rose_write_internal() uses a temp buffer of 100 bytes, but a manual inspection showed that given arbitrary input, rose_create_facilities() can fill up to 110 bytes. Lets use a tailroom of 256 bytes for peace of mind, and remove the bounce buffer : we can simply allocate a big enough skb and adjust its length as needed. syzbot report : BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:352 [inline] BUG: KASAN: stack-out-of-bounds in rose_create_facilities net/rose/rose_subr.c:521 [inline] BUG: KASAN: stack-out-of-bounds in rose_write_internal+0x597/0x15d0 net/rose/rose_subr.c:116 Write of size 7 at addr ffff88808b1ffbef by task syz-executor.0/24854 CPU: 0 PID: 24854 Comm: syz-executor.0 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x38/0x50 mm/kasan/common.c:131 memcpy include/linux/string.h:352 [inline] rose_create_facilities net/rose/rose_subr.c:521 [inline] rose_write_internal+0x597/0x15d0 net/rose/rose_subr.c:116 rose_connect+0x7cb/0x1510 net/rose/af_rose.c:826 __sys_connect+0x266/0x330 net/socket.c:1685 __do_sys_connect net/socket.c:1696 [inline] __se_sys_connect net/socket.c:1693 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1693 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458079 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f47b8d9dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458079 RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f47b8d9e6d4 R13: 00000000004be4a4 R14: 00000000004ceca8 R15: 00000000ffffffff The buggy address belongs to the page: page:ffffea00022c7fc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x1fffc0000000000() raw: 01fffc0000000000 0000000000000000 ffffffff022c0101 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88808b1ffa80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88808b1ffb00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 03 >ffff88808b1ffb80: f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 04 f3 ^ ffff88808b1ffc00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ffff88808b1ffc80: 00 00 00 00 00 00 00 f1 f1 f1 f1 f1 f1 01 f2 01 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/rose/rose_subr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7ca57741b2fb..7849f286bb93 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype) struct sk_buff *skb; unsigned char *dptr; unsigned char lci1, lci2; - char buffer[100]; - int len, faclen = 0; + int maxfaclen = 0; + int len, faclen; + int reserve; - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; + reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; + len = ROSE_MIN_LEN; switch (frametype) { case ROSE_CALL_REQUEST: len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - faclen = rose_create_facilities(buffer, rose); - len += faclen; + maxfaclen = 256; break; case ROSE_CALL_ACCEPTED: case ROSE_CLEAR_REQUEST: @@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype) break; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); + if (!skb) return; /* * Space for AX.25 header and PID. */ - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1); + skb_reserve(skb, reserve); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); lci1 = (rose->lci >> 8) & 0x0F; lci2 = (rose->lci >> 0) & 0xFF; @@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype) dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; - memcpy(dptr, buffer, faclen); + faclen = rose_create_facilities(dptr, rose); + skb_put(skb, faclen); dptr += faclen; break; -- cgit v1.2.3 From c22da36688d6298f2e546dcc43fdc1ad35036467 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 16 Mar 2019 01:00:50 +0100 Subject: gtp: change NET_UDP_TUNNEL dependency to select Similarly to commit a7603ac1fc8c ("geneve: change NET_UDP_TUNNEL dependency to select"), GTP has a dependency on NET_UDP_TUNNEL which makes impossible to compile it if no other protocol depending on NET_UDP_TUNNEL is selected. Fix this by changing the depends to a select, and drop NET_IP_TUNNEL from the select list, as it already depends on NET_UDP_TUNNEL. Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5e4ca082cfcd..7a96d168efc4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -216,8 +216,8 @@ config GENEVE config GTP tristate "GPRS Tunneling Protocol datapath (GTP-U)" - depends on INET && NET_UDP_TUNNEL - select NET_IP_TUNNEL + depends on INET + select NET_UDP_TUNNEL ---help--- This allows one to create gtp virtual interfaces that provide the GPRS Tunneling Protocol datapath (GTP-U). This tunneling protocol -- cgit v1.2.3 From bb9e5c5bcd76f4474eac3baf643d7a39f7bac7bb Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 16 Mar 2019 14:21:19 +1100 Subject: mac8390: Fix mmio access size probe The bug that Stan reported is as follows. After a restart, a 16-bit NIC may be incorrectly identified as a 32-bit NIC and stop working. mac8390 slot.E: Memory length resource not found, probing mac8390 slot.E: Farallon EtherMac II-C (type farallon) mac8390 slot.E: MAC 00:00:c5:30:c2:99, IRQ 61, 32 KB shared memory at 0xfeed0000, 32-bit access. The bug never arises after a cold start and only intermittently after a warm start. (I didn't investigate why the bug is intermittent.) It turns out that memcpy_toio() is deprecated and memcmp_withio() also has issues. Replacing these calls with mmio accessors fixes the problem. Reported-and-tested-by: Stan Johnson Fixes: 2964db0f5904 ("m68k: Mac DP8390 update") Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/mac8390.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 342ae08ec3c2..d60a86aa8aa8 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -153,8 +153,6 @@ static void dayna_block_input(struct net_device *dev, int count, static void dayna_block_output(struct net_device *dev, int count, const unsigned char *buf, int start_page); -#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c)) - /* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */ static void slow_sane_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page); @@ -233,19 +231,26 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) static enum mac8390_access mac8390_testio(unsigned long membase) { - unsigned long outdata = 0xA5A0B5B0; - unsigned long indata = 0x00000000; + u32 outdata = 0xA5A0B5B0; + u32 indata = 0; + /* Try writing 32 bits */ - memcpy_toio((void __iomem *)membase, &outdata, 4); - /* Now compare them */ - if (memcmp_withio(&outdata, membase, 4) == 0) + nubus_writel(outdata, membase); + /* Now read it back */ + indata = nubus_readl(membase); + if (outdata == indata) return ACCESS_32; + + outdata = 0xC5C0D5D0; + indata = 0; + /* Write 16 bit output */ word_memcpy_tocard(membase, &outdata, 4); /* Now read it back */ word_memcpy_fromcard(&indata, membase, 4); if (outdata == indata) return ACCESS_16; + return ACCESS_UNKNOWN; } -- cgit v1.2.3 From a7faaa0c5dc7d091cc9f72b870d7edcdd6f43f12 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Sat, 16 Mar 2019 08:28:18 +0000 Subject: net: aquantia: fix rx checksum offload for UDP/TCP over IPv6 TCP/UDP checksum validity was propagated to skb only if IP checksum is valid. But for IPv6 there is no validity as there is no checksum in IPv6. This patch propagates TCP/UDP checksum validity regardless of IP checksum. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Igor Russkikh Signed-off-by: Nikita Danilov Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 74550ccc7a20..e2ffb159cbe2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -186,11 +186,12 @@ static void aq_rx_checksum(struct aq_ring_s *self, } if (buff->is_ip_cso) { __skb_incr_checksum_unnecessary(skb); - if (buff->is_udp_cso || buff->is_tcp_cso) - __skb_incr_checksum_unnecessary(skb); } else { skb->ip_summed = CHECKSUM_NONE; } + + if (buff->is_udp_cso || buff->is_tcp_cso) + __skb_incr_checksum_unnecessary(skb); } #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) -- cgit v1.2.3 From cc4807bb609230d8959fd732b0bf3bd4c2de8eac Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Sat, 16 Mar 2019 17:02:54 +0800 Subject: vxlan: Don't call gro_cells_destroy() before device is unregistered Commit ad6c9986bcb62 ("vxlan: Fix GRO cells race condition between receive and link delete") fixed a race condition for the typical case a vxlan device is dismantled from the current netns. But if a netns is dismantled, vxlan_destroy_tunnels() is called to schedule a unregister_netdevice_queue() of all the vxlan tunnels that are related to this netns. In vxlan_destroy_tunnels(), gro_cells_destroy() is called and finished before unregister_netdevice_queue(). This means that the gro_cells_destroy() call is done too soon, for the same reasons explained in above commit. So we need to fully respect the RCU rules, and thus must remove the gro_cells_destroy() call or risk use after-free. Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer") Signed-off-by: Suanming.Mou Suggested-by: Eric Dumazet Reviewed-by: Stefano Brivio Reviewed-by: Zhiqiang Liu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 077f1b9f2761..d76dfed8d9bb 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4335,10 +4335,8 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) /* If vxlan->dev is in the same netns, it has already been added * to the list by the previous loop. */ - if (!net_eq(dev_net(vxlan->dev), net)) { - gro_cells_destroy(&vxlan->gro_cells); + if (!net_eq(dev_net(vxlan->dev), net)) unregister_netdevice_queue(vxlan->dev, head); - } } for (h = 0; h < PORT_HASH_SIZE; ++h) -- cgit v1.2.3 From a4dc6a49156b1f8d6e17251ffda17c9e6a5db78a Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 16 Mar 2019 14:41:30 +0100 Subject: packets: Always register packet sk in the same order When using fanouts with AF_PACKET, the demux functions such as fanout_demux_cpu will return an index in the fanout socket array, which corresponds to the selected socket. The ordering of this array depends on the order the sockets were added to a given fanout group, so for FANOUT_CPU this means sockets are bound to cpus in the order they are configured, which is OK. However, when stopping then restarting the interface these sockets are bound to, the sockets are reassigned to the fanout group in the reverse order, due to the fact that they were inserted at the head of the interface's AF_PACKET socket list. This means that traffic that was directed to the first socket in the fanout group is now directed to the last one after an interface restart. In the case of FANOUT_CPU, traffic from CPU0 will be directed to the socket that used to receive traffic from the last CPU after an interface restart. This commit introduces a helper to add a socket at the tail of a list, then uses it to register AF_PACKET sockets. Note that this changes the order in which sockets are listed in /proc and with sock_diag. Fixes: dc99f600698d ("packet: Add fanout support") Signed-off-by: Maxime Chevallier Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++++++ net/packet/af_packet.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 328cb7cb7b0b..8de5ee258b93 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -710,6 +710,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) hlist_add_head_rcu(&sk->sk_node, list); } +static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_tail_rcu(&sk->sk_node, list); +} + static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8376bc1c1508..8754d7c93b84 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3243,7 +3243,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, } mutex_lock(&net->packet.sklist_lock); - sk_add_node_rcu(sk, &net->packet.sklist); + sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); -- cgit v1.2.3 From 18bed89107a400af0d672ec85a270f1545db2569 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Mon, 18 Mar 2019 14:39:52 +0900 Subject: af_packet: fix the tx skb protocol in raw sockets with ETH_P_ALL I am using "protocol ip" filters in TC to manipulate TC flower classifiers, which are only available with "protocol ip". However, I faced an issue that packets sent via raw sockets with ETH_P_ALL did not match the ip filters even if they did satisfy the condition (e.g., DHCP offer from dhcpd). I have determined that the behavior was caused by an unexpected value stored in skb->protocol, namely, ETH_P_ALL instead of ETH_P_IP, when packets were sent via raw sockets with ETH_P_ALL set. IMHO, storing ETH_P_ALL in skb->protocol is not appropriate for packets sent via raw sockets because ETH_P_ALL is not a real ether type used on wire, but a virtual one. This patch fixes the tx protocol selection in cases of transmission via raw sockets created with ETH_P_ALL so that it asks the driver to extract protocol from the Ethernet header. Fixes: 75c65772c3 ("net/packet: Ask driver for protocol if not provided by user") Signed-off-by: Yoshiki Komachi Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8754d7c93b84..323655a25674 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1852,7 +1852,8 @@ oom: static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { - if (!skb->protocol && sock->type == SOCK_RAW) { + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && + sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } -- cgit v1.2.3 From 273160ffc6b993c7c91627f5a84799c66dfe4dee Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 19:47:00 +0800 Subject: sctp: get sctphdr by offset in sctp_compute_cksum sctp_hdr(skb) only works when skb->transport_header is set properly. But in Netfilter, skb->transport_header for ipv6 is not guaranteed to be right value for sctphdr. It would cause to fail to check the checksum for sctp packets. So fix it by using offset, which is always right in all places. v1->v2: - Fix the changelog. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Reported-by: Li Shuang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 32ee65a30aff..1c6e6c0766ca 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -61,7 +61,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - struct sctphdr *sh = sctp_hdr(skb); + struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, -- cgit v1.2.3 From 636d25d557d1073281013c43e4ff4737692da2d4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 19:58:29 +0800 Subject: sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant Now sctp_copy_descendant() copies pd_lobby from old sctp scok to new sctp sock. If sctp_sock_migrate() returns error, it will panic when releasing new sock and trying to purge pd_lobby due to the incorrect pointers in pd_lobby. [ 120.485116] kasan: CONFIG_KASAN_INLINE enabled [ 120.486270] kasan: GPF could be caused by NULL-ptr deref or user [ 120.509901] Call Trace: [ 120.510443] sctp_ulpevent_free+0x1e8/0x490 [sctp] [ 120.511438] sctp_queue_purge_ulpevents+0x97/0xe0 [sctp] [ 120.512535] sctp_close+0x13a/0x700 [sctp] [ 120.517483] inet_release+0xdc/0x1c0 [ 120.518215] __sock_release+0x1d2/0x2a0 [ 120.519025] sctp_do_peeloff+0x30f/0x3c0 [sctp] We fix it by not copying sctp_sock pd_lobby in sctp_copy_descendan(), and skb_queue_head_init() can also be removed in sctp_sock_migrate(). Reported-by: syzbot+85e0b422ff140b03672a@syzkaller.appspotmail.com Fixes: 89664c623617 ("sctp: sctp_sock_migrate() returns error if sctp_bind_addr_dup() fails") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6140471efd4b..65b538604c5b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9169,7 +9169,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to, { int ancestor_size = sizeof(struct inet_sock) + sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, auto_asconf_list); + offsetof(struct sctp_sock, pd_lobby); if (sk_from->sk_family == PF_INET6) ancestor_size += sizeof(struct ipv6_pinfo); @@ -9253,7 +9253,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby. * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue. */ - skb_queue_head_init(&newsp->pd_lobby); atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode); if (atomic_read(&sctp_sk(oldsk)->pd_mode)) { -- cgit v1.2.3 From 1354e72fabf4d8763817564648984351755f0ccb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 18 Mar 2019 20:05:59 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt Currently if the user pass an invalid asoc_id to SCTP_DEFAULT_SEND_PARAM on a TCP-style socket, it will silently ignore the new parameters. That's because after not finding an asoc, it is checking asoc_id against the known values of CURRENT/FUTURE/ALL values and that fails to match. IOW, if the user supplies an invalid asoc id or not, it should either match the current asoc or the socket itself so that it will inherit these later. Fixes it by forcing asoc_id to SCTP_FUTURE_ASSOC in case it is a TCP-style socket without an asoc, so that the values get set on the socket. Fixes: 707e45b3dc5a ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DEFAULT_SEND_PARAM sockopt") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 65b538604c5b..d0e5c627a266 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3024,6 +3024,9 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + info.sinfo_assoc_id = SCTP_FUTURE_ASSOC; + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; -- cgit v1.2.3 From 8e2614fc1c2a525d3244df65da486fc914a2bf78 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:00 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DELAYED_SACK sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_DELAYED_SACK sockopt. Fixes: 9c5829e1c49e ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DELAYED_SACK sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d0e5c627a266..4c5821befaf3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2920,6 +2920,9 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + params.sack_assoc_id = SCTP_FUTURE_ASSOC; + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || params.sack_assoc_id == SCTP_ALL_ASSOC) { if (params.sack_delay) { -- cgit v1.2.3 From a842e65b25a418363bf8196e2343123a984ee69b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:01 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SNDINFO sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_DEFAULT_SNDINFO sockopt. Fixes: 92fc3bd928c9 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DEFAULT_SNDINFO sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4c5821befaf3..3bac03931b67 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3087,6 +3087,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + info.snd_assoc_id = SCTP_FUTURE_ASSOC; + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; -- cgit v1.2.3 From cface2cb585e392995cc11a4a814b433e6099ec7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:02 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_CONTEXT sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_CONTEXT sockopt. Fixes: 49b037acca8c ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_CONTEXT sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3bac03931b67..b024a625f78a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3540,6 +3540,9 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval, return 0; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->default_rcv_context = params.assoc_value; -- cgit v1.2.3 From 746bc215a6b223caf7eb6b33400458c15e742920 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:03 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_MAX_BURST sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_MAX_BURST sockopt. Fixes: e0651a0dc877 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_MAX_BURST sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b024a625f78a..df879b163414 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3682,6 +3682,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->max_burst = params.assoc_value; -- cgit v1.2.3 From 0685d6b72207a6de7ea6853e48b009e71d64fe1b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:04 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_KEY sockopt. Fixes: 7fb3be13a236 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index df879b163414..2ac221c795c2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3813,6 +3813,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + authkey->sca_assoc_id = SCTP_FUTURE_ASSOC; + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || authkey->sca_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_key(ep, asoc, authkey); -- cgit v1.2.3 From 06b39e8506f6dd4e11e1d8fc4d314d72d237ad10 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:05 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_ACTIVE_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_ACTIVE_KEY sockopt. Fixes: bf9fb6ad4f29 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_ACTIVE_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2ac221c795c2..1d098f0ccbb5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3871,6 +3871,9 @@ static int sctp_setsockopt_active_key(struct sock *sk, if (asoc) return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); -- cgit v1.2.3 From 220675eb2e485519afbe7f3b457f7ce883086482 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:06 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_DELETE_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_DELETE_KEY sockopt. Fixes: 3adcc300603e ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_DELETE_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1d098f0ccbb5..087ca0bb86bd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3925,6 +3925,9 @@ static int sctp_setsockopt_del_key(struct sock *sk, if (asoc) return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); -- cgit v1.2.3 From 200f3a3bcb293d8d55b860632b9d5c9b5e763273 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:07 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_DEACTIVATE_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_DEACTIVATE_KEY sockopt. Fixes: 2af66ff3edc7 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_DEACTIVATE_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 087ca0bb86bd..0187444567e0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3978,6 +3978,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, if (asoc) return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); -- cgit v1.2.3 From cbb45c6cd5e64c344798892d6e200b0b253d0b59 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:08 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_PRINFO sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_DEFAULT_PRINFO sockopt. Fixes: 3a583059d187 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DEFAULT_PRINFO sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0187444567e0..238e5c804cc9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4196,6 +4196,9 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + info.pr_assoc_id = SCTP_FUTURE_ASSOC; + if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || info.pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); -- cgit v1.2.3 From 9430ff992644e9f9c3ba6283cc56d40b421522e9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:09 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_ENABLE_STREAM_RESET sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_ENABLE_STREAM_RESET sockopt. Fixes: 99a62135e127 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_ENABLE_STREAM_RESET sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 238e5c804cc9..6b0ee8946bdb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4281,6 +4281,9 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) ep->strreset_enable = params.assoc_value; -- cgit v1.2.3 From 995186193fd7a21c8fc6a2f2a96d33e26447eb01 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:10 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_EVENT sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_EVENT sockopt. Fixes: d251f05e3ba2 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_EVENT sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6b0ee8946bdb..10df48aa4e2c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4574,6 +4574,9 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval, if (asoc) return sctp_assoc_ulpevent_type_set(¶m, asoc); + if (sctp_style(sk, TCP)) + param.se_assoc_id = SCTP_FUTURE_ASSOC; + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || param.se_assoc_id == SCTP_ALL_ASSOC) sctp_ulpevent_type_set(&sp->subscribe, -- cgit v1.2.3 From b59c19d9d901a8eb04896ec027787a55acb71fc6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:11 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_STREAM_SCHEDULER sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_STREAM_SCHEDULER sockopt. Fixes: 7efba10d6bd2 ("sctp: add SCTP_FUTURE_ASOC and SCTP_CURRENT_ASSOC for SCTP_STREAM_SCHEDULER sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 10df48aa4e2c..011c349d877a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4409,6 +4409,9 @@ static int sctp_setsockopt_scheduler(struct sock *sk, if (asoc) return sctp_sched_set_sched(asoc, params.assoc_value); + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->default_ss = params.assoc_value; -- cgit v1.2.3 From fae846e2b7124d4b076ef17791c73addf3b26350 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 18 Mar 2019 08:51:06 -0500 Subject: mISDN: hfcpci: Test both vendor & device ID for Digium HFC4S The device ID alone does not uniquely identify a device. Test both the vendor and device ID to make sure we don't mistakenly think some other vendor's 0xB410 device is a Digium HFC4S. Also, instead of the bare hex ID, use the same constant (PCI_DEVICE_ID_DIGIUM_HFC4S) used in the device ID table. No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 4d85645c87f7..0928fd1f0e0c 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4365,7 +4365,8 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, if (m->clock2) test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip); - if (ent->device == 0xB410) { + if (ent->vendor == PCI_VENDOR_ID_DIGIUM && + ent->device == PCI_DEVICE_ID_DIGIUM_HFC4S) { test_and_set_bit(HFC_CHIP_B410P, &hc->chip); test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip); test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip); -- cgit v1.2.3 From 12b409dd32dffad6d800774e2d250adaaaa1fdcd Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:54 +0100 Subject: s390/qeth: don't erase configuration while probing The HW trap and VNICC configuration is exposed via sysfs, and may have already been modified when qeth_l?_probe_device() attempts to initialize them. So (1) initialize the VNICC values a little earlier, and (2) don't bother about the HW trap mode, it was already initialized before. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 4 ++-- drivers/s390/net/qeth_l3_main.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8efb2e8ff8f4..3bfdd8545776 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -645,6 +645,8 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; + qeth_l2_vnicc_set_defaults(card); + if (gdev->dev.type == &qeth_generic_devtype) { rc = qeth_l2_create_device_attributes(&gdev->dev); if (rc) @@ -652,8 +654,6 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) } hash_init(card->mac_htable); - card->info.hwtrap = 0; - qeth_l2_vnicc_set_defaults(card); return 0; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7e68d9d16859..2f002843b16e 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2260,7 +2260,6 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) } hash_init(card->ip_htable); hash_init(card->ip_mc_htable); - card->info.hwtrap = 0; return 0; } -- cgit v1.2.3 From 7221b727f0079a32aca91f657141e1de564d4b97 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:55 +0100 Subject: s390/qeth: fix race when initializing the IP address table The ucast IP table is utilized by some of the L3-specific sysfs attributes that qeth_l3_create_device_attributes() provides. So initialize the table _before_ registering the attributes. Fixes: ebccc7397e4a ("s390/qeth: add missing hash table initializations") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 2f002843b16e..d805e72edb58 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2253,12 +2253,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; + hash_init(card->ip_htable); + if (gdev->dev.type == &qeth_generic_devtype) { rc = qeth_l3_create_device_attributes(&gdev->dev); if (rc) return rc; } - hash_init(card->ip_htable); + hash_init(card->ip_mc_htable); return 0; } -- cgit v1.2.3 From 104b48592b5441c722dcd95c38ab9300f2d94856 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:56 +0100 Subject: s390/qeth: be drop monitor friendly As part of the TX completion path, qeth_release_skbs() frees the completed skbs with __skb_queue_purge(). This ends in kfree_skb(), reporting every completed skb as dropped. On the other hand when dropping an skb in .ndo_start_xmit, we end up calling consume_skb()... where we should be using kfree_skb() so that drop monitors get notified. Switch the drop/consume logic around, and also don't accumulate dropped packets in the tx_errors statistics. Fixes: dc149e3764d8 ("s390/qeth: replace open-coded skb_queue_walk()") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 5 ++++- drivers/s390/net/qeth_l2_main.c | 3 +-- drivers/s390/net/qeth_l3_main.c | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 197b0f5b63e7..44bd6f04c145 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1150,13 +1150,16 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q, static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf) { + struct sk_buff *skb; + /* release may never happen from within CQ tasklet scope */ WARN_ON_ONCE(atomic_read(&buf->state) == QETH_QDIO_BUF_IN_CQ); if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING) qeth_notify_skbs(buf->q, buf, TX_NOTIFY_GENERALERROR); - __skb_queue_purge(&buf->skb_list); + while ((skb = __skb_dequeue(&buf->skb_list)) != NULL) + consume_skb(skb); } static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 3bfdd8545776..c3067fd3bd9e 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -629,8 +629,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, } /* else fall through */ QETH_TXQ_STAT_INC(queue, tx_dropped); - QETH_TXQ_STAT_INC(queue, tx_errors); - dev_kfree_skb_any(skb); + kfree_skb(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d805e72edb58..53712cf26406 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2096,8 +2096,7 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, tx_drop: QETH_TXQ_STAT_INC(queue, tx_dropped); - QETH_TXQ_STAT_INC(queue, tx_errors); - dev_kfree_skb_any(skb); + kfree_skb(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } -- cgit v1.2.3 From ffa91253739ca89fc997195d8bbd1f7ba3e29fbe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 18 Mar 2019 11:07:33 -0700 Subject: Documentation: networking: Update netdev-FAQ regarding patches Provide an explanation of what is expected with respect to sending new versions of specific patches within a patch series, as well as what happens if an earlier patch series accidentally gets merged). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 0ac5fa77f501..8c7a713cf657 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -131,6 +131,19 @@ it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. +Q: I made changes to only a few patches in a patch series should I resend only those changed? +-------------------------------------------------------------------------------------------- +A: No, please resend the entire patch series and make sure you do number your +patches such that it is clear this is the latest and greatest set of patches +that can be applied. + +Q: I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? +------------------------------------------------------------------------------------------------------------------------------------------- +A: There is no revert possible, once it is pushed out, it stays like that. +Please send incremental versions on top of what has been merged in order to fix +the patches the way they would look like if your latest patch series was to be +merged. + Q: How can I tell what patches are queued up for backporting to the various stable releases? -------------------------------------------------------------------------------------------- A: Normally Greg Kroah-Hartman collects stable commits himself, but for -- cgit v1.2.3 From 7c9abe12b359d988970c5e38f0e190249e5ae00f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Mar 2019 13:51:22 +0100 Subject: netfilter: nf_flowtable: remove duplicated transition in diagram No direct transition from prerouting to forward hook, routing lookup needs to happen first. Fixes: 19b351f16fd9 ("netfilter: add flowtable documentation") Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_flowtable.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt index 54128c50d508..ca2136c76042 100644 --- a/Documentation/networking/nf_flowtable.txt +++ b/Documentation/networking/nf_flowtable.txt @@ -44,10 +44,10 @@ including the Netfilter hooks and the flowtable fastpath bypass. / \ / \ |Routing | / \ --> ingress ---> prerouting ---> |decision| | postrouting |--> neigh_xmit \_________/ \__________/ ---------- \____________/ ^ - | ^ | | ^ | - flowtable | | ____\/___ | | - | | | / \ | | - __\/___ | --------->| forward |------------ | + | ^ | ^ | + flowtable | ____\/___ | | + | | / \ | | + __\/___ | | forward |------------ | |-----| | \_________/ | |-----| | 'flow offload' rule | |-----| | adds entry to | -- cgit v1.2.3 From c0316470683af0507427c3e0660246feacdf8363 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 13:22:07 +0100 Subject: mt7601u: check chip version on probe Since some USB device IDs are duplicated between mt7601u and mt76x0u devices, check chip version on probe and return error if not match 0x7601. Don't think this is serious issue, probe most likely will fail at some other point for wrong device, but we do not have to configure it if we know is not mt7601u device. Reported-by: Xose Vazquez Perez Signed-off-by: Stanislaw Gruszka Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/usb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c index d8b7863f7926..6ae7f14dc9bf 100644 --- a/drivers/net/wireless/mediatek/mt7601u/usb.c +++ b/drivers/net/wireless/mediatek/mt7601u/usb.c @@ -303,6 +303,10 @@ static int mt7601u_probe(struct usb_interface *usb_intf, mac_rev = mt7601u_rr(dev, MT_MAC_CSR0); dev_info(dev->dev, "ASIC revision: %08x MAC revision: %08x\n", asic_rev, mac_rev); + if ((asic_rev >> 16) != 0x7601) { + ret = -ENODEV; + goto err; + } /* Note: vendor driver skips this check for MT7601U */ if (!(mt7601u_rr(dev, MT_EFUSE_CTRL) & MT_EFUSE_CTRL_SEL)) -- cgit v1.2.3 From 40b941611bd4826b7e3e449f738af98ad22e1ce6 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 13:22:21 +0100 Subject: mt76x02u: check chip version on probe Since some USB device IDs are duplicated between mt76x0u, mt7601u and mt76x2u device, check chip version on probe and return error if not match the driver. Don't think this is serious issue, probe most likely will fail at some other point for wrong device, but we do not have to configure it if we know is not our device. Reported-by: Xose Vazquez Perez Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 10 +++++++--- drivers/net/wireless/mediatek/mt76/mt76x02.h | 7 +++++++ drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index 91718647da02..e5a06f74a6f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -229,7 +229,7 @@ static int mt76x0u_probe(struct usb_interface *usb_intf, struct usb_device *usb_dev = interface_to_usbdev(usb_intf); struct mt76x02_dev *dev; struct mt76_dev *mdev; - u32 asic_rev, mac_rev; + u32 mac_rev; int ret; mdev = mt76_alloc_device(&usb_intf->dev, sizeof(*dev), &mt76x0u_ops, @@ -262,10 +262,14 @@ static int mt76x0u_probe(struct usb_interface *usb_intf, goto err; } - asic_rev = mt76_rr(dev, MT_ASIC_VERSION); + mdev->rev = mt76_rr(dev, MT_ASIC_VERSION); mac_rev = mt76_rr(dev, MT_MAC_CSR0); dev_info(mdev->dev, "ASIC revision: %08x MAC revision: %08x\n", - asic_rev, mac_rev); + mdev->rev, mac_rev); + if (!is_mt76x0(dev)) { + ret = -ENODEV; + goto err; + } /* Note: vendor driver skips this check for MT76X0U */ if (!(mt76_rr(dev, MT_EFUSE_CTRL) & MT_EFUSE_CTRL_SEL)) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index b5a36d9fb2bd..07061eb4d1e1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -192,6 +192,13 @@ void mt76x02_mac_start(struct mt76x02_dev *dev); void mt76x02_init_debugfs(struct mt76x02_dev *dev); +static inline bool is_mt76x0(struct mt76x02_dev *dev) +{ + return mt76_chip(&dev->mt76) == 0x7610 || + mt76_chip(&dev->mt76) == 0x7630 || + mt76_chip(&dev->mt76) == 0x7650; +} + static inline bool is_mt76x2(struct mt76x02_dev *dev) { return mt76_chip(&dev->mt76) == 0x7612 || diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c index 7a5d539873ca..ac0f13d46299 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c @@ -65,6 +65,10 @@ static int mt76x2u_probe(struct usb_interface *intf, mdev->rev = mt76_rr(dev, MT_ASIC_VERSION); dev_info(mdev->dev, "ASIC revision: %08x\n", mdev->rev); + if (!is_mt76x2(dev)) { + err = -ENODEV; + goto err; + } err = mt76x2u_register_device(dev); if (err < 0) -- cgit v1.2.3 From f2a00a821aacfa77985e4dbe83ed064c48a21bd5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 11 Mar 2019 14:09:53 +0100 Subject: mt76: mt7603: use the correct hweight8() function __sw_hweight8() is only defined if CONFIG_GENERIC_HWEIGHT is enabled. The function that works on all architectures is hweight8(). Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/beacon.c | 3 +-- drivers/net/wireless/mediatek/mt76/mt7603/init.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7603/mcu.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c b/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c index afcd86f735b4..4dcb465095d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c @@ -135,8 +135,7 @@ void mt7603_pre_tbtt_tasklet(unsigned long arg) out: mt76_queue_tx_cleanup(dev, MT_TXQ_BEACON, false); - if (dev->mt76.q_tx[MT_TXQ_BEACON].queued > - __sw_hweight8(dev->beacon_mask)) + if (dev->mt76.q_tx[MT_TXQ_BEACON].queued > hweight8(dev->beacon_mask)) dev->beacon_check++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c index 15cc8f33b34d..d54dda67d036 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c @@ -112,7 +112,7 @@ static void mt7603_phy_init(struct mt7603_dev *dev) { int rx_chains = dev->mt76.antenna_mask; - int tx_chains = __sw_hweight8(rx_chains) - 1; + int tx_chains = hweight8(rx_chains) - 1; mt76_rmw(dev, MT_WF_RMAC_RMCR, (MT_WF_RMAC_RMCR_SMPS_MODE | diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c index 4b0713f1fd5e..d06905ea8cc6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c @@ -433,7 +433,7 @@ int mt7603_mcu_set_channel(struct mt7603_dev *dev) { struct cfg80211_chan_def *chandef = &dev->mt76.chandef; struct ieee80211_hw *hw = mt76_hw(dev); - int n_chains = __sw_hweight8(dev->mt76.antenna_mask); + int n_chains = hweight8(dev->mt76.antenna_mask); struct { u8 control_chan; u8 center_chan; -- cgit v1.2.3 From 13f61dfc5235cfa82b1efbcdf4b4f14d9be233da Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 11 Mar 2019 14:24:35 +0100 Subject: mt76: fix schedule while atomic in mt76x02_reset_state Fix following schedule while atomic in mt76x02_reset_state since synchronize_rcu is run inside a RCU section [44036.944222] mt76x2e 0000:06:00.0: MCU message 31 (seq 3) timed out [44036.944281] BUG: sleeping function called from invalid context at kernel/rcu/tree_exp.h:818 [44036.944284] in_atomic(): 1, irqs_disabled(): 0, pid: 28066, name: kworker/u4:1 [44036.944287] INFO: lockdep is turned off. [44036.944292] CPU: 1 PID: 28066 Comm: kworker/u4:1 Tainted: G W 5.0.0-rc7-wdn-t1+ #7 [44036.944294] Hardware name: Dell Inc. Studio XPS 1340/0K183D, BIOS A11 09/08/2009 [44036.944305] Workqueue: phy1 mt76x02_wdt_work [mt76x02_lib] [44036.944308] Call Trace: [44036.944317] dump_stack+0x67/0x90 [44036.944322] ___might_sleep.cold.88+0x9f/0xaf [44036.944327] rcu_blocking_is_gp+0x13/0x50 [44036.944330] synchronize_rcu+0x17/0x80 [44036.944337] mt76_sta_state+0x138/0x1d0 [mt76] [44036.944349] mt76x02_wdt_work+0x1c9/0x610 [mt76x02_lib] [44036.944355] process_one_work+0x2a5/0x620 [44036.944361] worker_thread+0x35/0x3e0 [44036.944368] kthread+0x11c/0x140 [44036.944376] ret_from_fork+0x3a/0x50 [44036.944384] BUG: scheduling while atomic: kworker/u4:1/28066/0x00000002 [44036.944387] INFO: lockdep is turned off. [44036.944389] Modules linked in: cmac ctr ccm af_packet snd_hda_codec_hdmi Introduce __mt76_sta_remove in order to run sta_remove without holding dev->mutex. Move __mt76_sta_remove outside of RCU section in mt76x02_reset_state Fixes: e4ebb8b403d1 ("mt76: mt76x2: implement full device restart on watchdog reset") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mac80211.c | 18 +++++++++++------- drivers/net/wireless/mediatek/mt76/mt76.h | 2 ++ drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 19 ++++++++++--------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index a033745adb2f..316167404729 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -679,19 +679,15 @@ out: return ret; } -static void -mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, - struct ieee80211_sta *sta) +void __mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta) { struct mt76_wcid *wcid = (struct mt76_wcid *)sta->drv_priv; - int idx = wcid->idx; - int i; + int i, idx = wcid->idx; rcu_assign_pointer(dev->wcid[idx], NULL); synchronize_rcu(); - mutex_lock(&dev->mutex); - if (dev->drv->sta_remove) dev->drv->sta_remove(dev, vif, sta); @@ -699,7 +695,15 @@ mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, for (i = 0; i < ARRAY_SIZE(sta->txq); i++) mt76_txq_remove(dev, sta->txq[i]); mt76_wcid_free(dev->wcid_mask, idx); +} +EXPORT_SYMBOL_GPL(__mt76_sta_remove); +static void +mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta) +{ + mutex_lock(&dev->mutex); + __mt76_sta_remove(dev, vif, sta); mutex_unlock(&dev->mutex); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index cb50e96e736b..bcbfd3c4a44b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -695,6 +695,8 @@ int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state); +void __mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta); struct ieee80211_sta *mt76_rx_convert(struct sk_buff *skb); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 9de015bcd4f9..daaed1220147 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -441,19 +441,23 @@ static void mt76x02_reset_state(struct mt76x02_dev *dev) { int i; + lockdep_assert_held(&dev->mt76.mutex); + clear_bit(MT76_STATE_RUNNING, &dev->mt76.state); rcu_read_lock(); - ieee80211_iter_keys_rcu(dev->mt76.hw, NULL, mt76x02_key_sync, NULL); + rcu_read_unlock(); for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid); i++) { - struct mt76_wcid *wcid = rcu_dereference(dev->mt76.wcid[i]); - struct mt76x02_sta *msta; struct ieee80211_sta *sta; struct ieee80211_vif *vif; + struct mt76x02_sta *msta; + struct mt76_wcid *wcid; void *priv; + wcid = rcu_dereference_protected(dev->mt76.wcid[i], + lockdep_is_held(&dev->mt76.mutex)); if (!wcid) continue; @@ -463,13 +467,10 @@ static void mt76x02_reset_state(struct mt76x02_dev *dev) priv = msta->vif; vif = container_of(priv, struct ieee80211_vif, drv_priv); - mt76_sta_state(dev->mt76.hw, vif, sta, - IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); + __mt76_sta_remove(&dev->mt76, vif, sta); memset(msta, 0, sizeof(*msta)); } - rcu_read_unlock(); - dev->vif_mask = 0; dev->beacon_mask = 0; } @@ -489,11 +490,11 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) napi_disable(&dev->mt76.napi[i]); + mutex_lock(&dev->mt76.mutex); + if (restart) mt76x02_reset_state(dev); - mutex_lock(&dev->mt76.mutex); - if (dev->beacon_mask) mt76_clear(dev, MT_BEACON_TIME_CFG, MT_BEACON_TIME_CFG_BEACON_TX | -- cgit v1.2.3 From 7dfc45e6282a7662279d168cc1219929456f8750 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 12 Mar 2019 13:32:07 +0100 Subject: mt76x02: do not enable RTS/CTS by default My commit 26a7b5473191 ("mt76x02: set protection according to ht operation element") enabled by default RTS/CTS protection for OFDM and CCK traffic, because MT_TX_RTS_CFG_THRESH is configured to non 0xffff by initvals and .set_rts_threshold callback is not called by mac80211 on initialization, only on user request or during ieee80211_reconfig() (suspend/resuem or restart_hw). Enabling RTS/CTS cause some problems when sending probe request frames by hcxdumptool penetration tool, but I expect it can cause other issues on different scenarios. Restore previous setting of RTS/CTS being disabled by default for OFDM/CCK by changing MT_TX_RTS_CFG_THRESH initvals to 0xffff. Fixes: 26a7b5473191 ("mt76x02: set protection according to ht operation element") Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/init.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h index 0290ba5869a5..736f81752b5b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h @@ -46,7 +46,7 @@ static const struct mt76_reg_pair common_mac_reg_table[] = { { MT_MM20_PROT_CFG, 0x01742004 }, { MT_MM40_PROT_CFG, 0x03f42084 }, { MT_TXOP_CTRL_CFG, 0x0000583f }, - { MT_TX_RTS_CFG, 0x00092b20 }, + { MT_TX_RTS_CFG, 0x00ffff20 }, { MT_EXP_ACK_TIME, 0x002400ca }, { MT_TXOP_HLDR_ET, 0x00000002 }, { MT_XIFS_TIME_CFG, 0x33a41010 }, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c index f8534362e2c8..a30ef2c5a9db 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c @@ -106,7 +106,7 @@ void mt76_write_mac_initvals(struct mt76x02_dev *dev) { MT_TX_SW_CFG1, 0x00010000 }, { MT_TX_SW_CFG2, 0x00000000 }, { MT_TXOP_CTRL_CFG, 0x0400583f }, - { MT_TX_RTS_CFG, 0x00100020 }, + { MT_TX_RTS_CFG, 0x00ffff20 }, { MT_TX_TIMEOUT_CFG, 0x000a2290 }, { MT_TX_RETRY_CFG, 0x47f01f0f }, { MT_EXP_ACK_TIME, 0x002c00dc }, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c index 5e84b4535cb1..3b82345756ea 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c @@ -93,7 +93,6 @@ int mt76x2u_mac_reset(struct mt76x02_dev *dev) mt76_wr(dev, MT_TX_LINK_CFG, 0x1020); mt76_wr(dev, MT_AUTO_RSP_CFG, 0x13); mt76_wr(dev, MT_MAX_LEN_CFG, 0x2f00); - mt76_wr(dev, MT_TX_RTS_CFG, 0x92b20); mt76_wr(dev, MT_WMM_AIFSN, 0x2273); mt76_wr(dev, MT_WMM_CWMIN, 0x2344); -- cgit v1.2.3 From 7442c483b963dbee7d1b655cbad99c727c047828 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Mar 2019 17:35:11 +0100 Subject: mlxsw: core: mlxsw: core: avoid -Wint-in-bool-context warning A recently added function in mlxsw triggers a harmless compiler warning: In file included from drivers/net/ethernet/mellanox/mlxsw/core.h:17, from drivers/net/ethernet/mellanox/mlxsw/core_env.c:7: drivers/net/ethernet/mellanox/mlxsw/core_env.c: In function 'mlxsw_env_module_temp_thresholds_get': drivers/net/ethernet/mellanox/mlxsw/reg.h:8015:45: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] #define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) ~~~~~^~~~~~ drivers/net/ethernet/mellanox/mlxsw/core_env.c:116:8: note: in expansion of macro 'MLXSW_REG_MTMP_TEMP_TO_MC' if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) { ^~~~~~~~~~~~~~~~~~~~~~~~~ The warning is normally disabled, but it would be nice to enable it to find real bugs, and there are no other known instances at the moment. Replace the negation with a zero-comparison, which also matches the comment above it. Fixes: d93c19a1d95c ("mlxsw: core: Add API for QSFP module temperature thresholds reading") Signed-off-by: Arnd Bergmann Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 7a15e932ed2f..c1c1965d7acc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -113,7 +113,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, return 0; default: /* Do not consider thresholds for zero temperature. */ - if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) { + if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) { *temp = 0; return 0; } -- cgit v1.2.3 From 223a960c01227e4dbcb6f9fa06b47d73bda21274 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 18 Mar 2019 23:36:08 +0200 Subject: net: stmmac: fix memory corruption with large MTUs When using 16K DMA buffers and ring mode, the DES3 refill is not working correctly as the function is using a bogus pointer for checking the private data. As a result stale pointers will remain in the RX descriptor ring, so DMA will now likely overwrite/corrupt some already freed memory. As simple reproducer, just receive some UDP traffic: # ifconfig eth0 down; ifconfig eth0 mtu 9000; ifconfig eth0 up # iperf3 -c 192.168.253.40 -u -b 0 -R If you didn't crash by now check the RX descriptors to find non-contiguous RX buffers: cat /sys/kernel/debug/stmmaceth/eth0/descriptors_status [...] 1 [0x2be5020]: 0xa3220321 0x9ffc1ffc 0x72d70082 0x130e207e ^^^^^^^^^^^^^^^^^^^^^ 2 [0x2be5040]: 0xa3220321 0x9ffc1ffc 0x72998082 0x1311a07e ^^^^^^^^^^^^^^^^^^^^^ A simple ping test will now report bad data: # ping -s 8200 192.168.253.40 PING 192.168.253.40 (192.168.253.40) 8200(8228) bytes of data. 8208 bytes from 192.168.253.40: icmp_seq=1 ttl=64 time=1.00 ms wrong data byte #8144 should be 0xd0 but was 0x88 Fix the wrong pointer. Also we must refill DES3 only if the DMA buffer size is 16K. Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx") Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index f936166d8910..4d9bcb4d0378 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -113,10 +113,11 @@ static unsigned int is_jumbo_frm(int len, int enh_desc) static void refill_desc3(void *priv_ptr, struct dma_desc *p) { - struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; + struct stmmac_rx_queue *rx_q = priv_ptr; + struct stmmac_priv *priv = rx_q->priv_data; /* Fill DES3 in case of RING mode */ - if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + if (priv->dma_buf_sz == BUF_SIZE_16KiB) p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB); } -- cgit v1.2.3 From a3e23f719f5c4a38ffb3d30c8d7632a4ed8ccd9e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Mar 2019 10:16:53 +0800 Subject: net-sysfs: call dev_hold if kobject_init_and_add success In netdev_queue_add_kobject and rx_queue_add_kobject, if sysfs_create_group failed, kobject_put will call netdev_queue_release to decrease dev refcont, however dev_hold has not be called. So we will see this while unregistering dev: unregister_netdevice: waiting for bcsh0 to become free. Usage count = -1 Reported-by: Hulk Robot Fixes: d0d668371679 ("net: don't decrement kobj reference count on init failure") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4ff661f6f989..8f8b7b6c2945 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -928,6 +928,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) { @@ -937,7 +939,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) } kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return error; } @@ -1464,6 +1465,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) { @@ -1473,7 +1476,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return 0; } -- cgit v1.2.3 From d7737d4257459ca8921ff911c88937be1a11ea9d Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 22:19:44 -0500 Subject: nfc: Fix to check for kmemdup failure In case of kmemdup failure while setting the service name the patch returns -ENOMEM upstream for processing. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller --- net/nfc/llcp_sock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ae296273ce3d..17dcd0b5eb32 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -726,6 +726,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->service_name = kmemdup(addr->service_name, llcp_sock->service_name_len, GFP_KERNEL); + if (!llcp_sock->service_name) { + ret = -ENOMEM; + goto sock_llcp_release; + } nfc_llcp_sock_link(&local->connecting_sockets, sk); @@ -745,10 +749,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, return ret; sock_unlink: - nfc_llcp_put_ssap(local, llcp_sock->ssap); - nfc_llcp_sock_unlink(&local->connecting_sockets, sk); +sock_llcp_release: + nfc_llcp_put_ssap(local, llcp_sock->ssap); + put_dev: nfc_put_device(dev); -- cgit v1.2.3 From 89e4130939a20304f4059ab72179da81f5347528 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Mar 2019 05:45:35 -0700 Subject: tcp: do not use ipv6 header for ipv4 flow When a dual stack tcp listener accepts an ipv4 flow, it should not attempt to use an ipv6 header or tcp_v6_iif() helper. Fixes: 1397ed35f22d ("ipv6: add flowinfo for tcp6 pkt_options for all cases") Fixes: df3687ffc665 ("ipv6: add the IPV6_FL_F_REFLECT flag to IPV6_FL_A_GET") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 57ef69a10889..44d431849d39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1110,11 +1110,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count -- cgit v1.2.3 From e0aa67709f89d08c8d8e5bdd9e0b649df61d0090 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Mar 2019 05:46:18 -0700 Subject: dccp: do not use ipv6 header for ipv4 flow When a dual stack dccp listener accepts an ipv4 flow, it should not attempt to use an ipv6 header or inet6_iif() helper. Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d5740bad5b18..57d84e9b7b6f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -436,8 +436,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count -- cgit v1.2.3 From fb6fafbc7de4a813bb5364358bbe27f71e62b24a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Mar 2019 22:15:58 +0100 Subject: 3c515: fix integer overflow warning clang points out a harmless signed integer overflow: drivers/net/ethernet/3com/3c515.c:1530:66: error: implicit conversion from 'int' to 'short' changes value from 32783 to -32753 [-Werror,-Wconstant-conversion] new_mode = SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~ drivers/net/ethernet/3com/3c515.c:1532:52: error: implicit conversion from 'int' to 'short' changes value from 32775 to -32761 [-Werror,-Wconstant-conversion] new_mode = SetRxFilter | RxStation | RxMulticast | RxBroadcast; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ drivers/net/ethernet/3com/3c515.c:1534:38: error: implicit conversion from 'int' to 'short' changes value from 32773 to -32763 [-Werror,-Wconstant-conversion] new_mode = SetRxFilter | RxStation | RxBroadcast; ~ ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ Make the variable unsigned to avoid the overflow. Fixes: Linux-2.1.128pre1 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c515.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 808abb6b3671..b15752267c8d 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1521,7 +1521,7 @@ static void update_stats(int ioaddr, struct net_device *dev) static void set_rx_mode(struct net_device *dev) { int ioaddr = dev->base_addr; - short new_mode; + unsigned short new_mode; if (dev->flags & IFF_PROMISC) { if (corkscrew_debug > 3) -- cgit v1.2.3 From f84532ce5887dac2ef67498b897a8713793eebde Mon Sep 17 00:00:00 2001 From: Vinay K Nallamothu Date: Tue, 19 Mar 2019 22:41:18 +0000 Subject: mpls: Fix 6PE forwarding This patch adds support for 6PE (RFC 4798) which uses IPv4-mapped IPv6 nexthop to connect IPv6 islands over IPv4 only MPLS network core. Prior to this fix, to find the link-layer destination mac address, 6PE enabled host/router was sending IPv6 ND requests for IPv4-mapped IPv6 nexthop address over the interface facing the IPv4 only core which wouldn't success as the core is IPv6 free. This fix changes that behavior on 6PE host to treat the nexthop as IPv4 address and send ARP requests whenever the next-hop address is an IPv4-mapped IPv6 address. Below topology illustrates the issue and how the patch addresses it. abcd::1.1.1.1 (lo) abcd::2.2.2.2 (lo) R0 (PE/host)------------------------R1--------------------------------R2 (PE/host) <--- IPv4 MPLS core ---> <------ IPv4 MPLS core --------> eth1 eth2 eth3 eth4 172.18.0.10 172.18.0.11 172.19.0.11 172.19.0.12 ffff::172.18.0.10 ffff::172.19.0.12 <------------------IPv6 MPLS tunnel ----------------------> R0 and R2 act as 6PE routers of IPv6 islands. R1 is IPv4 only with MPLS tunnels between R0,R1 and R1,R2. docker exec r0 ip -f inet6 route add abcd::2.2.2.2/128 nexthop encap mpls 100 via ::ffff:172.18.0.11 dev eth1 docker exec r2 ip -f inet6 route add abcd::1.1.1.1/128 nexthop encap mpls 200 via ::ffff:172.19.0.11 dev eth4 docker exec r1 ip -f mpls route add 100 via inet 172.19.0.12 dev eth3 docker exec r1 ip -f mpls route add 200 via inet 172.18.0.10 dev eth2 With the change, when R0 sends an IPv6 packet over MPLS tunnel to abcd::2.2.2.2, using ::ffff:172.18.0.11 as the nexthop, it does neighbor discovery for 172.18.18.0.11. Signed-off-by: Vinay K Nallamothu Tested-by: Avinash Lingala Tested-by: Aravind Srinivas Srinivasa Prabhakar Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index dda8930f20e7..f3a8557494d6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -140,9 +140,15 @@ static int mpls_xmit(struct sk_buff *skb) if (rt) err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, skb); - else if (rt6) - err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, - skb); + else if (rt6) { + if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) { + /* 6PE (RFC 4798) */ + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3], + skb); + } else + err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, + skb); + } if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); -- cgit v1.2.3 From b25a31bf0ca091aa8bdb9ab329b0226257568bbe Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 19 Mar 2019 13:22:41 +0900 Subject: netfilter: nf_tables: add missing ->release_ops() in error path of newrule() ->release_ops() callback releases resources and this is used in error path. If nf_tables_newrule() fails after ->select_ops(), it should release resources. but it can not call ->destroy() because that should be called after ->init(). At this point, ->release_ops() should be used for releasing resources. Test commands: modprobe -rv xt_tcpudp iptables-nft -I INPUT -m tcp <-- error command lsmod Result: Module Size Used by xt_tcpudp 20480 2 <-- it should be 0 Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 513f93118604..ef7772e976cc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2806,8 +2806,11 @@ err2: nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { - if (info[i].ops != NULL) + if (info[i].ops) { module_put(info[i].ops->type->owner); + if (info[i].ops->type->release_ops) + info[i].ops->type->release_ops(info[i].ops); + } } kvfree(info); return err; -- cgit v1.2.3 From 398f0132c14754fcd03c1c4f8e7176d001ce8ea1 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 18 Mar 2019 23:14:52 -0700 Subject: net/packet: Set __GFP_NOWARN upon allocation in alloc_pg_vec Since commit fc62814d690c ("net/packet: fix 4gb buffer limit due to overflow check") one can now allocate packet ring buffers >= UINT_MAX. However, syzkaller found that that triggers a warning: [ 21.100000] WARNING: CPU: 2 PID: 2075 at mm/page_alloc.c:4584 __alloc_pages_nod0 [ 21.101490] Modules linked in: [ 21.101921] CPU: 2 PID: 2075 Comm: syz-executor.0 Not tainted 5.0.0 #146 [ 21.102784] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 21.103887] RIP: 0010:__alloc_pages_nodemask+0x2a0/0x630 [ 21.104640] Code: fe ff ff 65 48 8b 04 25 c0 de 01 00 48 05 90 0f 00 00 41 bd 01 00 00 00 48 89 44 24 48 e9 9c fe 3 [ 21.107121] RSP: 0018:ffff88805e1cf920 EFLAGS: 00010246 [ 21.107819] RAX: 0000000000000000 RBX: ffffffff85a488a0 RCX: 0000000000000000 [ 21.108753] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: 0000000000000000 [ 21.109699] RBP: 1ffff1100bc39f28 R08: ffffed100bcefb67 R09: ffffed100bcefb67 [ 21.110646] R10: 0000000000000001 R11: ffffed100bcefb66 R12: 000000000000000d [ 21.111623] R13: 0000000000000000 R14: ffff88805e77d888 R15: 000000000000000d [ 21.112552] FS: 00007f7c7de05700(0000) GS:ffff88806d100000(0000) knlGS:0000000000000000 [ 21.113612] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 21.114405] CR2: 000000000065c000 CR3: 000000005e58e006 CR4: 00000000001606e0 [ 21.115367] Call Trace: [ 21.115705] ? __alloc_pages_slowpath+0x21c0/0x21c0 [ 21.116362] alloc_pages_current+0xac/0x1e0 [ 21.116923] kmalloc_order+0x18/0x70 [ 21.117393] kmalloc_order_trace+0x18/0x110 [ 21.117949] packet_set_ring+0x9d5/0x1770 [ 21.118524] ? packet_rcv_spkt+0x440/0x440 [ 21.119094] ? lock_downgrade+0x620/0x620 [ 21.119646] ? __might_fault+0x177/0x1b0 [ 21.120177] packet_setsockopt+0x981/0x2940 [ 21.120753] ? __fget+0x2fb/0x4b0 [ 21.121209] ? packet_release+0xab0/0xab0 [ 21.121740] ? sock_has_perm+0x1cd/0x260 [ 21.122297] ? selinux_secmark_relabel_packet+0xd0/0xd0 [ 21.123013] ? __fget+0x324/0x4b0 [ 21.123451] ? selinux_netlbl_socket_setsockopt+0x101/0x320 [ 21.124186] ? selinux_netlbl_sock_rcv_skb+0x3a0/0x3a0 [ 21.124908] ? __lock_acquire+0x529/0x3200 [ 21.125453] ? selinux_socket_setsockopt+0x5d/0x70 [ 21.126075] ? __sys_setsockopt+0x131/0x210 [ 21.126533] ? packet_release+0xab0/0xab0 [ 21.127004] __sys_setsockopt+0x131/0x210 [ 21.127449] ? kernel_accept+0x2f0/0x2f0 [ 21.127911] ? ret_from_fork+0x8/0x50 [ 21.128313] ? do_raw_spin_lock+0x11b/0x280 [ 21.128800] __x64_sys_setsockopt+0xba/0x150 [ 21.129271] ? lockdep_hardirqs_on+0x37f/0x560 [ 21.129769] do_syscall_64+0x9f/0x450 [ 21.130182] entry_SYSCALL_64_after_hwframe+0x49/0xbe We should allocate with __GFP_NOWARN to handle this. Cc: Kal Conley Cc: Andrey Konovalov Fixes: fc62814d690c ("net/packet: fix 4gb buffer limit due to overflow check") Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 323655a25674..9419c5cf4de5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4210,7 +4210,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) struct pgv *pg_vec; int i; - pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; -- cgit v1.2.3 From 1c87e79a002f6a159396138cd3f3ab554a2a8887 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Mar 2019 14:45:48 +0800 Subject: ipv6: make ip6_create_rt_rcu return ip6_null_entry instead of NULL Jianlin reported a crash: [ 381.484332] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 [ 381.619802] RIP: 0010:fib6_rule_lookup+0xa3/0x160 [ 382.009615] Call Trace: [ 382.020762] [ 382.030174] ip6_route_redirect.isra.52+0xc9/0xf0 [ 382.050984] ip6_redirect+0xb6/0xf0 [ 382.066731] icmpv6_notify+0xca/0x190 [ 382.083185] ndisc_redirect_rcv+0x10f/0x160 [ 382.102569] ndisc_rcv+0xfb/0x100 [ 382.117725] icmpv6_rcv+0x3f2/0x520 [ 382.133637] ip6_input_finish+0xbf/0x460 [ 382.151634] ip6_input+0x3b/0xb0 [ 382.166097] ipv6_rcv+0x378/0x4e0 It was caused by the lookup function __ip6_route_redirect() returns NULL in fib6_rule_lookup() when ip6_create_rt_rcu() returns NULL. So we fix it by simply making ip6_create_rt_rcu() return ip6_null_entry instead of NULL. v1->v2: - move down 'fallback:' to make it more readable. Fixes: e873e4b9cc7e ("ipv6: use fib6_info_hold_safe() when necessary") Reported-by: Jianlin Shi Suggested-by: Paolo Abeni Signed-off-by: Xin Long Reviewed-by: David Ahern Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv6/route.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4ef4bbdb49d4..0302e0eb07af 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1040,14 +1040,20 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) struct rt6_info *nrt; if (!fib6_info_hold_safe(rt)) - return NULL; + goto fallback; nrt = ip6_dst_alloc(dev_net(dev), dev, flags); - if (nrt) - ip6_rt_copy_init(nrt, rt); - else + if (!nrt) { fib6_info_release(rt); + goto fallback; + } + ip6_rt_copy_init(nrt, rt); + return nrt; + +fallback: + nrt = dev_net(dev)->ipv6.ip6_null_entry; + dst_hold(&nrt->dst); return nrt; } @@ -1096,10 +1102,6 @@ restart: dst_hold(&rt->dst); } else { rt = ip6_create_rt_rcu(f6i); - if (!rt) { - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); - } } rcu_read_unlock(); -- cgit v1.2.3 From ef82bcfa671b9a635bab5fa669005663d8b177c5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Mar 2019 14:49:38 +0800 Subject: sctp: use memdup_user instead of vmemdup_user In sctp_setsockopt_bindx()/__sctp_setsockopt_connectx(), it allocates memory with addrs_size which is passed from userspace. We used flag GFP_USER to put some more restrictions on it in Commit cacc06215271 ("sctp: use GFP_USER for user-controlled kmalloc"). However, since Commit c981f254cc82 ("sctp: use vmemdup_user() rather than badly open-coding memdup_user()"), vmemdup_user() has been used, which doesn't check GFP_USER flag when goes to vmalloc_*(). So when addrs_size is a huge value, it could exhaust memory and even trigger oom killer. This patch is to use memdup_user() instead, in which GFP_USER would work to limit the memory allocation with a huge addrs_size. Note we can't fix it by limiting 'addrs_size', as there's no demand for it from RFC. Reported-by: syzbot+ec1b7575afef85a0e5ca@syzkaller.appspotmail.com Fixes: c981f254cc82 ("sctp: use vmemdup_user() rather than badly open-coding memdup_user()") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 011c349d877a..9874e60c9b0d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -999,7 +999,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = vmemdup_user(addrs, addrs_size); + kaddrs = memdup_user(addrs, addrs_size); if (unlikely(IS_ERR(kaddrs))) return PTR_ERR(kaddrs); @@ -1007,7 +1007,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, addr_buf = kaddrs; while (walk_size < addrs_size) { if (walk_size + sizeof(sa_family_t) > addrs_size) { - kvfree(kaddrs); + kfree(kaddrs); return -EINVAL; } @@ -1018,7 +1018,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, * causes the address buffer to overflow return EINVAL. */ if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - kvfree(kaddrs); + kfree(kaddrs); return -EINVAL; } addrcnt++; @@ -1054,7 +1054,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, } out: - kvfree(kaddrs); + kfree(kaddrs); return err; } @@ -1329,7 +1329,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = vmemdup_user(addrs, addrs_size); + kaddrs = memdup_user(addrs, addrs_size); if (unlikely(IS_ERR(kaddrs))) return PTR_ERR(kaddrs); @@ -1349,7 +1349,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); out_free: - kvfree(kaddrs); + kfree(kaddrs); return err; } -- cgit v1.2.3 From 536d3680fd2dab5c39857d62a3e084198fc74ff9 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Dequeue RX packets explicitly The ks8851 driver lets the chip auto-dequeue received packets once they have been read in full. It achieves that by setting the ADRFE flag in the RXQCR register ("Auto-Dequeue RXQ Frame Enable"). However if allocation of a packet's socket buffer or retrieval of the packet over the SPI bus fails, the packet will not have been read in full and is not auto-dequeued. Such partial retrieval of a packet confuses the chip's RX queue management: On the next RX interrupt, the first packet read from the queue will be the one left there previously and this one can be retrieved without issues. But for any newly received packets, the frame header status and byte count registers (RXFHSR and RXFHBCR) contain bogus values, preventing their retrieval. The chip allows explicitly dequeueing a packet from the RX queue by setting the RRXEF flag in the RXQCR register ("Release RX Error Frame"). This could be used to dequeue the packet in case of an error, but if that error is a failed SPI transfer, it is unknown if the packet was transferred in full and was auto-dequeued or if it was only transferred in part and requires an explicit dequeue. The safest approach is thus to always dequeue packets explicitly and forgo auto-dequeueing. Without this change, I've witnessed packet retrieval break completely when an SPI DMA transfer fails, requiring a chip reset. Explicit dequeueing magically fixes this and makes packet retrieval absolutely robust for me. The chip's documentation suggests auto-dequeuing and uses the RRXEF flag only to dequeue error frames which the driver doesn't want to retrieve. But that seems to be a fair-weather approach. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index bd6e9014bc74..a93f8e842c07 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -535,9 +535,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) /* set dma read address */ ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00); - /* start the packet dma process, and set auto-dequeue rx */ - ks8851_wrreg16(ks, KS_RXQCR, - ks->rc_rxqcr | RXQCR_SDA | RXQCR_ADRFE); + /* start DMA access */ + ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); if (rxlen > 4) { unsigned int rxalign; @@ -568,7 +567,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) } } - ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); + /* end DMA access and dequeue packet */ + ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF); } } -- cgit v1.2.3 From 761cfa979a0c177d6c2d93ef5585cd79ae49a7d5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Reassert reset pin if chip ID check fails Commit 73fdeb82e963 ("net: ks8851: Add optional vdd_io regulator and reset gpio") amended the ks8851 driver to briefly assert the chip's reset pin on probe. It also amended the probe routine's error path to reassert the reset pin if a subsequent initialization step fails. However the commit misplaced reassertion of the reset pin in the error path such that it is not performed if the check of the Chip ID and Enable Register (CIDER) fails. The error path is therefore slightly asymmetrical to the probe routine's body. Fix it. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Stephen Boyd Cc: Nishanth Menon Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index a93f8e842c07..1633fa5c709c 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1554,9 +1554,9 @@ err_netdev: free_irq(ndev->irq, ks); err_irq: +err_id: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); -err_id: regulator_disable(ks->vdd_reg); err_reg: regulator_disable(ks->vdd_io); -- cgit v1.2.3 From d268f31552794abf5b6aa5af31021643411f25f5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Delay requesting IRQ until opened The ks8851 driver currently requests the IRQ before registering the net_device. Because the net_device name is used as IRQ name and is still "eth%d" when the IRQ is requested, it's impossibe to tell IRQs apart if multiple ks8851 chips are present. Most other drivers delay requesting the IRQ until the net_device is opened. Do the same. The driver doesn't enable interrupts on the chip before opening the net_device and disables them when closing it, so there doesn't seem to be a need to request the IRQ already on probe. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 1633fa5c709c..c9faec4c5b25 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -785,6 +785,15 @@ static void ks8851_tx_work(struct work_struct *work) static int ks8851_net_open(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + int ret; + + ret = request_threaded_irq(dev->irq, NULL, ks8851_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + dev->name, ks); + if (ret < 0) { + netdev_err(dev, "failed to get irq\n"); + return ret; + } /* lock the card, even if we may not actually be doing anything * else at the moment */ @@ -899,6 +908,8 @@ static int ks8851_net_stop(struct net_device *dev) dev_kfree_skb(txb); } + free_irq(dev->irq, ks); + return 0; } @@ -1529,14 +1540,6 @@ static int ks8851_probe(struct spi_device *spi) ks8851_read_selftest(ks); ks8851_init_mac(ks); - ret = request_threaded_irq(spi->irq, NULL, ks8851_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - ndev->name, ks); - if (ret < 0) { - dev_err(&spi->dev, "failed to get irq\n"); - goto err_irq; - } - ret = register_netdev(ndev); if (ret) { dev_err(&spi->dev, "failed to register network device\n"); @@ -1549,11 +1552,7 @@ static int ks8851_probe(struct spi_device *spi) return 0; - err_netdev: - free_irq(ndev->irq, ks); - -err_irq: err_id: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); @@ -1574,7 +1573,6 @@ static int ks8851_remove(struct spi_device *spi) dev_info(&spi->dev, "remove\n"); unregister_netdev(priv->netdev); - free_irq(spi->irq, priv); if (gpio_is_valid(priv->gpio)) gpio_set_value(priv->gpio, 0); regulator_disable(priv->vdd_reg); -- cgit v1.2.3 From 9624bafa5f6418b9ca5b3f66d1f6a6a2e8bf6d4c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Set initial carrier state to down The ks8851 chip's initial carrier state is down. A Link Change Interrupt is signaled once interrupts are enabled if the carrier is up. The ks8851 driver has it backwards by assuming that the initial carrier state is up. The state is therefore misrepresented if the interface is opened with no cable attached. Fix it. The Link Change interrupt is sometimes not signaled unless the P1MBSR register (which contains the Link Status bit) is read on ->ndo_open(). This might be a hardware erratum. Read the register by calling mii_check_link(), which has the desirable side effect of setting the carrier state to down if the cable was detached while the interface was closed. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index c9faec4c5b25..b83b070a9eec 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -858,6 +858,7 @@ static int ks8851_net_open(struct net_device *dev) netif_dbg(ks, ifup, ks->netdev, "network device up\n"); mutex_unlock(&ks->lock); + mii_check_link(&ks->mii); return 0; } @@ -1519,6 +1520,7 @@ static int ks8851_probe(struct spi_device *spi) spi_set_drvdata(spi, ks); + netif_carrier_off(ks->netdev); ndev->if_port = IF_PORT_100BASET; ndev->netdev_ops = &ks8851_netdev_ops; ndev->irq = spi->irq; -- cgit v1.2.3 From cbda74a12c4b738feb90752fbca3648d24646079 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Fix register macro misnomers In the header file accompanying the ks8851 driver, the P1SCLMD register macros are misnamed, they actually pertain to the P1CR register. The P1CR macros in turn pertain to the P1SR register, see pages 65 to 68 of the spec: http://www.hqchip.com/uploads/pdf/201703/47c98946d6c97a4766e14db3f24955f2.pdf The misnomers have no negative consequences so far because the macros aren't used by ks8851.c, but that's about to change. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.h | 52 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index 852256ef1f22..1050fa48b73c 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -257,33 +257,35 @@ #define KS_P1ANLPR 0xEE #define KS_P1SCLMD 0xF4 -#define P1SCLMD_LEDOFF (1 << 15) -#define P1SCLMD_TXIDS (1 << 14) -#define P1SCLMD_RESTARTAN (1 << 13) -#define P1SCLMD_DISAUTOMDIX (1 << 10) -#define P1SCLMD_FORCEMDIX (1 << 9) -#define P1SCLMD_AUTONEGEN (1 << 7) -#define P1SCLMD_FORCE100 (1 << 6) -#define P1SCLMD_FORCEFDX (1 << 5) -#define P1SCLMD_ADV_FLOW (1 << 4) -#define P1SCLMD_ADV_100BT_FDX (1 << 3) -#define P1SCLMD_ADV_100BT_HDX (1 << 2) -#define P1SCLMD_ADV_10BT_FDX (1 << 1) -#define P1SCLMD_ADV_10BT_HDX (1 << 0) #define KS_P1CR 0xF6 -#define P1CR_HP_MDIX (1 << 15) -#define P1CR_REV_POL (1 << 13) -#define P1CR_OP_100M (1 << 10) -#define P1CR_OP_FDX (1 << 9) -#define P1CR_OP_MDI (1 << 7) -#define P1CR_AN_DONE (1 << 6) -#define P1CR_LINK_GOOD (1 << 5) -#define P1CR_PNTR_FLOW (1 << 4) -#define P1CR_PNTR_100BT_FDX (1 << 3) -#define P1CR_PNTR_100BT_HDX (1 << 2) -#define P1CR_PNTR_10BT_FDX (1 << 1) -#define P1CR_PNTR_10BT_HDX (1 << 0) +#define P1CR_LEDOFF (1 << 15) +#define P1CR_TXIDS (1 << 14) +#define P1CR_RESTARTAN (1 << 13) +#define P1CR_DISAUTOMDIX (1 << 10) +#define P1CR_FORCEMDIX (1 << 9) +#define P1CR_AUTONEGEN (1 << 7) +#define P1CR_FORCE100 (1 << 6) +#define P1CR_FORCEFDX (1 << 5) +#define P1CR_ADV_FLOW (1 << 4) +#define P1CR_ADV_100BT_FDX (1 << 3) +#define P1CR_ADV_100BT_HDX (1 << 2) +#define P1CR_ADV_10BT_FDX (1 << 1) +#define P1CR_ADV_10BT_HDX (1 << 0) + +#define KS_P1SR 0xF8 +#define P1SR_HP_MDIX (1 << 15) +#define P1SR_REV_POL (1 << 13) +#define P1SR_OP_100M (1 << 10) +#define P1SR_OP_FDX (1 << 9) +#define P1SR_OP_MDI (1 << 7) +#define P1SR_AN_DONE (1 << 6) +#define P1SR_LINK_GOOD (1 << 5) +#define P1SR_PNTR_FLOW (1 << 4) +#define P1SR_PNTR_100BT_FDX (1 << 3) +#define P1SR_PNTR_100BT_HDX (1 << 2) +#define P1SR_PNTR_10BT_FDX (1 << 1) +#define P1SR_PNTR_10BT_HDX (1 << 0) /* TX Frame control */ -- cgit v1.2.3 From aae079aa76d0cc3e679db31370364cb87a405651 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Deduplicate register macros The ks8851 chip is sold either with an SPI interface (KSZ8851SNL) or with a so-called non-PCI interface (KSZ8851-16MLL). When the driver for the latter was introduced with commit a55c0a0ed415 ("drivers/net: ks8851_mll ethernet network driver"), it duplicated the register macros introduced by the driver for the former with commit 3ba81f3ece3c ("net: Micrel KS8851 SPI network driver"). The chips are almost identical, so the duplication seems unwarranted. There are a handful of bits which are in use on the KSZ8851-16MLL but reserved on the KSZ8851SNL, and vice-versa, but there are no actual collisions. Thus, remove the duplicate definitions from the KSZ8851-16MLL driver. Mark all bits which differ between the two chips. Move the SPI frame opcodes, which are specific to KSZ8851SNL, to its driver. The KSZ8851-16MLL driver added a RXFCTR_THRESHOLD_MASK macro which is a duplication of the RXFCTR_RXFCT_MASK macro, rename it where it's used. Same for P1MBCR_FORCE_FDX, which duplicates the BMCR_FULLDPLX macro and OBCR_ODS_16MA, which duplicates OBCR_ODS_16mA. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 6 + drivers/net/ethernet/micrel/ks8851.h | 41 ++-- drivers/net/ethernet/micrel/ks8851_mll.c | 317 +------------------------------ 3 files changed, 34 insertions(+), 330 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index b83b070a9eec..7849119d407a 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -142,6 +142,12 @@ struct ks8851_net { static int msg_enable; +/* SPI frame opcodes */ +#define KS_SPIOP_RD (0x00) +#define KS_SPIOP_WR (0x40) +#define KS_SPIOP_RXFIFO (0x80) +#define KS_SPIOP_TXFIFO (0xC0) + /* shift for byte-enable data */ #define BYTE_EN(_x) ((_x) << 2) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index 1050fa48b73c..23da1e3ee429 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -11,9 +11,15 @@ */ #define KS_CCR 0x08 +#define CCR_LE (1 << 10) /* KSZ8851-16MLL */ #define CCR_EEPROM (1 << 9) -#define CCR_SPI (1 << 8) -#define CCR_32PIN (1 << 0) +#define CCR_SPI (1 << 8) /* KSZ8851SNL */ +#define CCR_8BIT (1 << 7) /* KSZ8851-16MLL */ +#define CCR_16BIT (1 << 6) /* KSZ8851-16MLL */ +#define CCR_32BIT (1 << 5) /* KSZ8851-16MLL */ +#define CCR_SHARED (1 << 4) /* KSZ8851-16MLL */ +#define CCR_48PIN (1 << 1) /* KSZ8851-16MLL */ +#define CCR_32PIN (1 << 0) /* KSZ8851SNL */ /* MAC address registers */ #define KS_MAR(_m) (0x15 - (_m)) @@ -112,13 +118,13 @@ #define RXCR1_RXE (1 << 0) #define KS_RXCR2 0x76 -#define RXCR2_SRDBL_MASK (0x7 << 5) -#define RXCR2_SRDBL_SHIFT (5) -#define RXCR2_SRDBL_4B (0x0 << 5) -#define RXCR2_SRDBL_8B (0x1 << 5) -#define RXCR2_SRDBL_16B (0x2 << 5) -#define RXCR2_SRDBL_32B (0x3 << 5) -#define RXCR2_SRDBL_FRAME (0x4 << 5) +#define RXCR2_SRDBL_MASK (0x7 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_SHIFT (5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_4B (0x0 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_8B (0x1 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_16B (0x2 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_32B (0x3 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_FRAME (0x4 << 5) /* KSZ8851SNL */ #define RXCR2_IUFFP (1 << 4) #define RXCR2_RXIUFCEZ (1 << 3) #define RXCR2_UDPLFE (1 << 2) @@ -143,8 +149,10 @@ #define RXFSHR_RXCE (1 << 0) #define KS_RXFHBCR 0x7E +#define RXFHBCR_CNT_MASK (0xfff << 0) + #define KS_TXQCR 0x80 -#define TXQCR_AETFE (1 << 2) +#define TXQCR_AETFE (1 << 2) /* KSZ8851SNL */ #define TXQCR_TXQMAM (1 << 1) #define TXQCR_METFE (1 << 0) @@ -167,6 +175,10 @@ #define KS_RXFDPR 0x86 #define RXFDPR_RXFPAI (1 << 14) +#define RXFDPR_WST (1 << 12) /* KSZ8851-16MLL */ +#define RXFDPR_EMS (1 << 11) /* KSZ8851-16MLL */ +#define RXFDPR_RXFP_MASK (0x7ff << 0) +#define RXFDPR_RXFP_SHIFT (0) #define KS_RXDTTR 0x8C #define KS_RXDBCTR 0x8E @@ -184,7 +196,7 @@ #define IRQ_RXMPDI (1 << 4) #define IRQ_LDI (1 << 3) #define IRQ_EDI (1 << 2) -#define IRQ_SPIBEI (1 << 1) +#define IRQ_SPIBEI (1 << 1) /* KSZ8851SNL */ #define IRQ_DEDI (1 << 0) #define KS_RXFCTR 0x9C @@ -288,13 +300,6 @@ #define P1SR_PNTR_10BT_HDX (1 << 0) /* TX Frame control */ - #define TXFR_TXIC (1 << 15) #define TXFR_TXFID_MASK (0x3f << 0) #define TXFR_TXFID_SHIFT (0) - -/* SPI frame opcodes */ -#define KS_SPIOP_RD (0x00) -#define KS_SPIOP_WR (0x40) -#define KS_SPIOP_RXFIFO (0x80) -#define KS_SPIOP_TXFIFO (0xC0) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 35f8c9ef204d..c946841c0a06 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -40,6 +40,8 @@ #include #include +#include "ks8851.h" + #define DRV_NAME "ks8851_mll" static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; @@ -48,319 +50,10 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; #define TX_BUF_SIZE 2000 #define RX_BUF_SIZE 2000 -#define KS_CCR 0x08 -#define CCR_EEPROM (1 << 9) -#define CCR_SPI (1 << 8) -#define CCR_8BIT (1 << 7) -#define CCR_16BIT (1 << 6) -#define CCR_32BIT (1 << 5) -#define CCR_SHARED (1 << 4) -#define CCR_32PIN (1 << 0) - -/* MAC address registers */ -#define KS_MARL 0x10 -#define KS_MARM 0x12 -#define KS_MARH 0x14 - -#define KS_OBCR 0x20 -#define OBCR_ODS_16MA (1 << 6) - -#define KS_EEPCR 0x22 -#define EEPCR_EESA (1 << 4) -#define EEPCR_EESB (1 << 3) -#define EEPCR_EEDO (1 << 2) -#define EEPCR_EESCK (1 << 1) -#define EEPCR_EECS (1 << 0) - -#define KS_MBIR 0x24 -#define MBIR_TXMBF (1 << 12) -#define MBIR_TXMBFA (1 << 11) -#define MBIR_RXMBF (1 << 4) -#define MBIR_RXMBFA (1 << 3) - -#define KS_GRR 0x26 -#define GRR_QMU (1 << 1) -#define GRR_GSR (1 << 0) - -#define KS_WFCR 0x2A -#define WFCR_MPRXE (1 << 7) -#define WFCR_WF3E (1 << 3) -#define WFCR_WF2E (1 << 2) -#define WFCR_WF1E (1 << 1) -#define WFCR_WF0E (1 << 0) - -#define KS_WF0CRC0 0x30 -#define KS_WF0CRC1 0x32 -#define KS_WF0BM0 0x34 -#define KS_WF0BM1 0x36 -#define KS_WF0BM2 0x38 -#define KS_WF0BM3 0x3A - -#define KS_WF1CRC0 0x40 -#define KS_WF1CRC1 0x42 -#define KS_WF1BM0 0x44 -#define KS_WF1BM1 0x46 -#define KS_WF1BM2 0x48 -#define KS_WF1BM3 0x4A - -#define KS_WF2CRC0 0x50 -#define KS_WF2CRC1 0x52 -#define KS_WF2BM0 0x54 -#define KS_WF2BM1 0x56 -#define KS_WF2BM2 0x58 -#define KS_WF2BM3 0x5A - -#define KS_WF3CRC0 0x60 -#define KS_WF3CRC1 0x62 -#define KS_WF3BM0 0x64 -#define KS_WF3BM1 0x66 -#define KS_WF3BM2 0x68 -#define KS_WF3BM3 0x6A - -#define KS_TXCR 0x70 -#define TXCR_TCGICMP (1 << 8) -#define TXCR_TCGUDP (1 << 7) -#define TXCR_TCGTCP (1 << 6) -#define TXCR_TCGIP (1 << 5) -#define TXCR_FTXQ (1 << 4) -#define TXCR_TXFCE (1 << 3) -#define TXCR_TXPE (1 << 2) -#define TXCR_TXCRC (1 << 1) -#define TXCR_TXE (1 << 0) - -#define KS_TXSR 0x72 -#define TXSR_TXLC (1 << 13) -#define TXSR_TXMC (1 << 12) -#define TXSR_TXFID_MASK (0x3f << 0) -#define TXSR_TXFID_SHIFT (0) -#define TXSR_TXFID_GET(_v) (((_v) >> 0) & 0x3f) - - -#define KS_RXCR1 0x74 -#define RXCR1_FRXQ (1 << 15) -#define RXCR1_RXUDPFCC (1 << 14) -#define RXCR1_RXTCPFCC (1 << 13) -#define RXCR1_RXIPFCC (1 << 12) -#define RXCR1_RXPAFMA (1 << 11) -#define RXCR1_RXFCE (1 << 10) -#define RXCR1_RXEFE (1 << 9) -#define RXCR1_RXMAFMA (1 << 8) -#define RXCR1_RXBE (1 << 7) -#define RXCR1_RXME (1 << 6) -#define RXCR1_RXUE (1 << 5) -#define RXCR1_RXAE (1 << 4) -#define RXCR1_RXINVF (1 << 1) -#define RXCR1_RXE (1 << 0) #define RXCR1_FILTER_MASK (RXCR1_RXINVF | RXCR1_RXAE | \ RXCR1_RXMAFMA | RXCR1_RXPAFMA) - -#define KS_RXCR2 0x76 -#define RXCR2_SRDBL_MASK (0x7 << 5) -#define RXCR2_SRDBL_SHIFT (5) -#define RXCR2_SRDBL_4B (0x0 << 5) -#define RXCR2_SRDBL_8B (0x1 << 5) -#define RXCR2_SRDBL_16B (0x2 << 5) -#define RXCR2_SRDBL_32B (0x3 << 5) -/* #define RXCR2_SRDBL_FRAME (0x4 << 5) */ -#define RXCR2_IUFFP (1 << 4) -#define RXCR2_RXIUFCEZ (1 << 3) -#define RXCR2_UDPLFE (1 << 2) -#define RXCR2_RXICMPFCC (1 << 1) -#define RXCR2_RXSAF (1 << 0) - -#define KS_TXMIR 0x78 - -#define KS_RXFHSR 0x7C -#define RXFSHR_RXFV (1 << 15) -#define RXFSHR_RXICMPFCS (1 << 13) -#define RXFSHR_RXIPFCS (1 << 12) -#define RXFSHR_RXTCPFCS (1 << 11) -#define RXFSHR_RXUDPFCS (1 << 10) -#define RXFSHR_RXBF (1 << 7) -#define RXFSHR_RXMF (1 << 6) -#define RXFSHR_RXUF (1 << 5) -#define RXFSHR_RXMR (1 << 4) -#define RXFSHR_RXFT (1 << 3) -#define RXFSHR_RXFTL (1 << 2) -#define RXFSHR_RXRF (1 << 1) -#define RXFSHR_RXCE (1 << 0) -#define RXFSHR_ERR (RXFSHR_RXCE | RXFSHR_RXRF |\ - RXFSHR_RXFTL | RXFSHR_RXMR |\ - RXFSHR_RXICMPFCS | RXFSHR_RXIPFCS |\ - RXFSHR_RXTCPFCS) -#define KS_RXFHBCR 0x7E -#define RXFHBCR_CNT_MASK 0x0FFF - -#define KS_TXQCR 0x80 -#define TXQCR_AETFE (1 << 2) -#define TXQCR_TXQMAM (1 << 1) -#define TXQCR_METFE (1 << 0) - -#define KS_RXQCR 0x82 -#define RXQCR_RXDTTS (1 << 12) -#define RXQCR_RXDBCTS (1 << 11) -#define RXQCR_RXFCTS (1 << 10) -#define RXQCR_RXIPHTOE (1 << 9) -#define RXQCR_RXDTTE (1 << 7) -#define RXQCR_RXDBCTE (1 << 6) -#define RXQCR_RXFCTE (1 << 5) -#define RXQCR_ADRFE (1 << 4) -#define RXQCR_SDA (1 << 3) -#define RXQCR_RRXEF (1 << 0) #define RXQCR_CMD_CNTL (RXQCR_RXFCTE|RXQCR_ADRFE) -#define KS_TXFDPR 0x84 -#define TXFDPR_TXFPAI (1 << 14) -#define TXFDPR_TXFP_MASK (0x7ff << 0) -#define TXFDPR_TXFP_SHIFT (0) - -#define KS_RXFDPR 0x86 -#define RXFDPR_RXFPAI (1 << 14) - -#define KS_RXDTTR 0x8C -#define KS_RXDBCTR 0x8E - -#define KS_IER 0x90 -#define KS_ISR 0x92 -#define IRQ_LCI (1 << 15) -#define IRQ_TXI (1 << 14) -#define IRQ_RXI (1 << 13) -#define IRQ_RXOI (1 << 11) -#define IRQ_TXPSI (1 << 9) -#define IRQ_RXPSI (1 << 8) -#define IRQ_TXSAI (1 << 6) -#define IRQ_RXWFDI (1 << 5) -#define IRQ_RXMPDI (1 << 4) -#define IRQ_LDI (1 << 3) -#define IRQ_EDI (1 << 2) -#define IRQ_SPIBEI (1 << 1) -#define IRQ_DEDI (1 << 0) - -#define KS_RXFCTR 0x9C -#define RXFCTR_THRESHOLD_MASK 0x00FF - -#define KS_RXFC 0x9D -#define RXFCTR_RXFC_MASK (0xff << 8) -#define RXFCTR_RXFC_SHIFT (8) -#define RXFCTR_RXFC_GET(_v) (((_v) >> 8) & 0xff) -#define RXFCTR_RXFCT_MASK (0xff << 0) -#define RXFCTR_RXFCT_SHIFT (0) - -#define KS_TXNTFSR 0x9E - -#define KS_MAHTR0 0xA0 -#define KS_MAHTR1 0xA2 -#define KS_MAHTR2 0xA4 -#define KS_MAHTR3 0xA6 - -#define KS_FCLWR 0xB0 -#define KS_FCHWR 0xB2 -#define KS_FCOWR 0xB4 - -#define KS_CIDER 0xC0 -#define CIDER_ID 0x8870 -#define CIDER_REV_MASK (0x7 << 1) -#define CIDER_REV_SHIFT (1) -#define CIDER_REV_GET(_v) (((_v) >> 1) & 0x7) - -#define KS_CGCR 0xC6 -#define KS_IACR 0xC8 -#define IACR_RDEN (1 << 12) -#define IACR_TSEL_MASK (0x3 << 10) -#define IACR_TSEL_SHIFT (10) -#define IACR_TSEL_MIB (0x3 << 10) -#define IACR_ADDR_MASK (0x1f << 0) -#define IACR_ADDR_SHIFT (0) - -#define KS_IADLR 0xD0 -#define KS_IAHDR 0xD2 - -#define KS_PMECR 0xD4 -#define PMECR_PME_DELAY (1 << 14) -#define PMECR_PME_POL (1 << 12) -#define PMECR_WOL_WAKEUP (1 << 11) -#define PMECR_WOL_MAGICPKT (1 << 10) -#define PMECR_WOL_LINKUP (1 << 9) -#define PMECR_WOL_ENERGY (1 << 8) -#define PMECR_AUTO_WAKE_EN (1 << 7) -#define PMECR_WAKEUP_NORMAL (1 << 6) -#define PMECR_WKEVT_MASK (0xf << 2) -#define PMECR_WKEVT_SHIFT (2) -#define PMECR_WKEVT_GET(_v) (((_v) >> 2) & 0xf) -#define PMECR_WKEVT_ENERGY (0x1 << 2) -#define PMECR_WKEVT_LINK (0x2 << 2) -#define PMECR_WKEVT_MAGICPKT (0x4 << 2) -#define PMECR_WKEVT_FRAME (0x8 << 2) -#define PMECR_PM_MASK (0x3 << 0) -#define PMECR_PM_SHIFT (0) -#define PMECR_PM_NORMAL (0x0 << 0) -#define PMECR_PM_ENERGY (0x1 << 0) -#define PMECR_PM_SOFTDOWN (0x2 << 0) -#define PMECR_PM_POWERSAVE (0x3 << 0) - -/* Standard MII PHY data */ -#define KS_P1MBCR 0xE4 -#define P1MBCR_FORCE_FDX (1 << 8) - -#define KS_P1MBSR 0xE6 -#define P1MBSR_AN_COMPLETE (1 << 5) -#define P1MBSR_AN_CAPABLE (1 << 3) -#define P1MBSR_LINK_UP (1 << 2) - -#define KS_PHY1ILR 0xE8 -#define KS_PHY1IHR 0xEA -#define KS_P1ANAR 0xEC -#define KS_P1ANLPR 0xEE - -#define KS_P1SCLMD 0xF4 -#define P1SCLMD_LEDOFF (1 << 15) -#define P1SCLMD_TXIDS (1 << 14) -#define P1SCLMD_RESTARTAN (1 << 13) -#define P1SCLMD_DISAUTOMDIX (1 << 10) -#define P1SCLMD_FORCEMDIX (1 << 9) -#define P1SCLMD_AUTONEGEN (1 << 7) -#define P1SCLMD_FORCE100 (1 << 6) -#define P1SCLMD_FORCEFDX (1 << 5) -#define P1SCLMD_ADV_FLOW (1 << 4) -#define P1SCLMD_ADV_100BT_FDX (1 << 3) -#define P1SCLMD_ADV_100BT_HDX (1 << 2) -#define P1SCLMD_ADV_10BT_FDX (1 << 1) -#define P1SCLMD_ADV_10BT_HDX (1 << 0) - -#define KS_P1CR 0xF6 -#define P1CR_HP_MDIX (1 << 15) -#define P1CR_REV_POL (1 << 13) -#define P1CR_OP_100M (1 << 10) -#define P1CR_OP_FDX (1 << 9) -#define P1CR_OP_MDI (1 << 7) -#define P1CR_AN_DONE (1 << 6) -#define P1CR_LINK_GOOD (1 << 5) -#define P1CR_PNTR_FLOW (1 << 4) -#define P1CR_PNTR_100BT_FDX (1 << 3) -#define P1CR_PNTR_100BT_HDX (1 << 2) -#define P1CR_PNTR_10BT_FDX (1 << 1) -#define P1CR_PNTR_10BT_HDX (1 << 0) - -/* TX Frame control */ - -#define TXFR_TXIC (1 << 15) -#define TXFR_TXFID_MASK (0x3f << 0) -#define TXFR_TXFID_SHIFT (0) - -#define KS_P1SR 0xF8 -#define P1SR_HP_MDIX (1 << 15) -#define P1SR_REV_POL (1 << 13) -#define P1SR_OP_100M (1 << 10) -#define P1SR_OP_FDX (1 << 9) -#define P1SR_OP_MDI (1 << 7) -#define P1SR_AN_DONE (1 << 6) -#define P1SR_LINK_GOOD (1 << 5) -#define P1SR_PNTR_FLOW (1 << 4) -#define P1SR_PNTR_100BT_FDX (1 << 3) -#define P1SR_PNTR_100BT_HDX (1 << 2) -#define P1SR_PNTR_10BT_FDX (1 << 1) -#define P1SR_PNTR_10BT_HDX (1 << 0) - #define ENUM_BUS_NONE 0 #define ENUM_BUS_8BIT 1 #define ENUM_BUS_16BIT 2 @@ -1475,7 +1168,7 @@ static void ks_setup(struct ks_net *ks) ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); /* Setup Receive Frame Threshold - 1 frame (RXFCTFC) */ - ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_THRESHOLD_MASK); + ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_RXFCT_MASK); /* Setup RxQ Command Control (RXQCR) */ ks->rc_rxqcr = RXQCR_CMD_CNTL; @@ -1488,7 +1181,7 @@ static void ks_setup(struct ks_net *ks) */ w = ks_rdreg16(ks, KS_P1MBCR); - w &= ~P1MBCR_FORCE_FDX; + w &= ~BMCR_FULLDPLX; ks_wrreg16(ks, KS_P1MBCR, w); w = TXCR_TXFCE | TXCR_TXPE | TXCR_TXCRC | TXCR_TCGIP; @@ -1629,7 +1322,7 @@ static int ks8851_probe(struct platform_device *pdev) ks_setup_int(ks); data = ks_rdreg16(ks, KS_OBCR); - ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16MA); + ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA); /* overwriting the default MAC address */ if (pdev->dev.of_node) { -- cgit v1.2.3 From 64447506f152cf0f88a0fc23140ca1c5f7ff34a8 Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi Radulescu Date: Wed, 20 Mar 2019 14:11:04 +0000 Subject: dpaa2-eth: Fix possible access beyond end of array Make sure we don't try to enqueue XDP_REDIRECT frames to an inexistent FQ. While it is guaranteed not to have more than one queue per core, having fewer queues than CPUs on an interface is a valid configuration. Fixes: d678be1dc1ec ("dpaa2-eth: add XDP_REDIRECT support") Reported-by: Jesper Dangaard Brouer Signed-off-by: Ioana Radulescu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2ba49e959c3f..1a68052abb94 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1817,7 +1817,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev, dpaa2_fd_set_format(&fd, dpaa2_fd_single); dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA); - fq = &priv->fq[smp_processor_id()]; + fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)]; for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { err = priv->enqueue(priv, fq, &fd, 0); if (err != -EBUSY) -- cgit v1.2.3 From cba368c1f01c27ed62fca7a853531845d263bb01 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 18 Mar 2019 10:37:13 -0700 Subject: bpf: Only print ref_obj_id for refcounted reg Naresh reported that test_align fails because of the mismatch at the verbose printout of the register states. The reason is due to the newly added ref_obj_id. ref_obj_id is only useful for refcounted reg. Thus, this patch fixes it by only printing ref_obj_id for refcounted reg. While at it, it also uses comma instead of space to separate between "id" and "ref_obj_id". Fixes: 1b986589680a ("bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to bpf_sk_release") Reported-by: Naresh Kamboju Signed-off-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 86f9cd5d1c4e..5aa810882583 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -352,6 +352,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) map_value_has_spin_lock(reg->map_ptr); } +static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_TCP_SOCK_OR_NULL; +} + static bool arg_type_may_be_refcounted(enum bpf_arg_type type) { return type == ARG_PTR_TO_SOCK_COMMON; @@ -451,8 +459,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (t == PTR_TO_STACK) verbose(env, ",call_%d", func(env, reg)->callsite); } else { - verbose(env, "(id=%d ref_obj_id=%d", reg->id, - reg->ref_obj_id); + verbose(env, "(id=%d", reg->id); + if (reg_type_may_be_refcounted_or_null(t)) + verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); if (t != SCALAR_VALUE) verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) -- cgit v1.2.3 From 0803278b0b4d8eeb2b461fb698785df65a725d9e Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Thu, 21 Mar 2019 18:00:35 +0800 Subject: bpf: do not restore dst_reg when cur_state is freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syzkaller hit 'KASAN: use-after-free Write in sanitize_ptr_alu' bug. Call trace: dump_stack+0xbf/0x12e print_address_description+0x6a/0x280 kasan_report+0x237/0x360 sanitize_ptr_alu+0x85a/0x8d0 adjust_ptr_min_max_vals+0x8f2/0x1ca0 adjust_reg_min_max_vals+0x8ed/0x22e0 do_check+0x1ca6/0x5d00 bpf_check+0x9ca/0x2570 bpf_prog_load+0xc91/0x1030 __se_sys_bpf+0x61e/0x1f00 do_syscall_64+0xc8/0x550 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fault injection trace:  kfree+0xea/0x290  free_func_state+0x4a/0x60  free_verifier_state+0x61/0xe0  push_stack+0x216/0x2f0 <- inject failslab  sanitize_ptr_alu+0x2b1/0x8d0  adjust_ptr_min_max_vals+0x8f2/0x1ca0  adjust_reg_min_max_vals+0x8ed/0x22e0  do_check+0x1ca6/0x5d00  bpf_check+0x9ca/0x2570  bpf_prog_load+0xc91/0x1030  __se_sys_bpf+0x61e/0x1f00  do_syscall_64+0xc8/0x550  entry_SYSCALL_64_after_hwframe+0x49/0xbe When kzalloc() fails in push_stack(), free_verifier_state() will free current verifier state. As push_stack() returns, dst_reg was restored if ptr_is_dst_reg is false. However, as member of the cur_state, dst_reg is also freed, and error occurs when dereferencing dst_reg. Simply fix it by testing ret of push_stack() before restoring dst_reg. Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") Signed-off-by: Xu Yu Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5aa810882583..f19d5e04c69d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3381,7 +3381,7 @@ do_sim: *dst_reg = *ptr_reg; } ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); - if (!ptr_is_dst_reg) + if (!ptr_is_dst_reg && ret) *dst_reg = tmp; return !ret ? -EFAULT : 0; } -- cgit v1.2.3 From 33872d79f5d1cbedaaab79669cc38f16097a9450 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 21 Mar 2019 09:11:59 +0100 Subject: tipc: fix cancellation of topology subscriptions When cancelling a subscription, we have to clear the cancel bit in the request before iterating over any established subscriptions with memcmp. Otherwise no subscription will ever be found, and it will not be possible to explicitly unsubscribe individual subscriptions. Fixes: 8985ecc7c1e0 ("tipc: simplify endianness handling in topology subscriber") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/topsrv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 4a708a4e8583..b45932d78004 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -363,6 +363,7 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, struct tipc_subscription *sub; if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } -- cgit v1.2.3 From ceabee6c59943bdd5e1da1a6a20dc7ee5f8113a2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Mar 2019 15:02:50 +0800 Subject: genetlink: Fix a memory leak on error path In genl_register_family(), when idr_alloc() fails, we forget to free the memory we possibly allocate for family->attrbuf. Reported-by: Hulk Robot Fixes: 2ae0f17df1cd ("genetlink: use idr to track families") Signed-off-by: YueHaibing Reviewed-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 25eeb6d2a75a..f0ec068e1d02 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -366,7 +366,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_free; } err = genl_validate_assign_mc_groups(family); @@ -385,6 +385,7 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_free: kfree(family->attrbuf); errout_locked: genl_unlock_all(); -- cgit v1.2.3 From 06acc17a96215a11134114aee26532b12dc8fde1 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 20 Mar 2019 07:36:55 -0500 Subject: net: phy: Add DP83825I to the DP83822 driver Add the DP83825I ethernet PHY to the DP83822 driver. These devices share the same WoL register bits and addresses. The phy_driver init was made into a macro as there may be future devices appended to this driver that will share the register space. http://www.ti.com/lit/gpn/dp83825i Reviewed-by: Florian Fainelli Signed-off-by: Dan Murphy Signed-off-by: David S. Miller --- drivers/net/phy/dp83822.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index bbd8c22067f3..97d45bd5b38e 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -15,6 +15,8 @@ #include #define DP83822_PHY_ID 0x2000a240 +#define DP83825I_PHY_ID 0x2000a150 + #define DP83822_DEVADDR 0x1f #define MII_DP83822_PHYSCR 0x11 @@ -304,26 +306,30 @@ static int dp83822_resume(struct phy_device *phydev) return 0; } +#define DP83822_PHY_DRIVER(_id, _name) \ + { \ + PHY_ID_MATCH_MODEL(_id), \ + .name = (_name), \ + .features = PHY_BASIC_FEATURES, \ + .soft_reset = dp83822_phy_reset, \ + .config_init = dp83822_config_init, \ + .get_wol = dp83822_get_wol, \ + .set_wol = dp83822_set_wol, \ + .ack_interrupt = dp83822_ack_interrupt, \ + .config_intr = dp83822_config_intr, \ + .suspend = dp83822_suspend, \ + .resume = dp83822_resume, \ + } + static struct phy_driver dp83822_driver[] = { - { - .phy_id = DP83822_PHY_ID, - .phy_id_mask = 0xfffffff0, - .name = "TI DP83822", - .features = PHY_BASIC_FEATURES, - .config_init = dp83822_config_init, - .soft_reset = dp83822_phy_reset, - .get_wol = dp83822_get_wol, - .set_wol = dp83822_set_wol, - .ack_interrupt = dp83822_ack_interrupt, - .config_intr = dp83822_config_intr, - .suspend = dp83822_suspend, - .resume = dp83822_resume, - }, + DP83822_PHY_DRIVER(DP83822_PHY_ID, "TI DP83822"), + DP83822_PHY_DRIVER(DP83825I_PHY_ID, "TI DP83825I"), }; module_phy_driver(dp83822_driver); static struct mdio_device_id __maybe_unused dp83822_tbl[] = { { DP83822_PHY_ID, 0xfffffff0 }, + { DP83825I_PHY_ID, 0xfffffff0 }, { }, }; MODULE_DEVICE_TABLE(mdio, dp83822_tbl); -- cgit v1.2.3 From cd5afa91f078c0787be0a62b5ef90301c00b0271 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 20 Mar 2019 19:12:22 +0530 Subject: net: macb: Add null check for PCLK and HCLK Both PCLK and HCLK are "required" clocks according to macb devicetree documentation. There is a chance that devm_clk_get doesn't return a negative error but just a NULL clock structure instead. In such a case the driver proceeds as usual and uses pclk value 0 to calculate MDC divisor which is incorrect. Hence fix the same in clock initialization. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ad099fd01b45..1522aee81884 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3370,14 +3370,20 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, *hclk = devm_clk_get(&pdev->dev, "hclk"); } - if (IS_ERR(*pclk)) { + if (IS_ERR_OR_NULL(*pclk)) { err = PTR_ERR(*pclk); + if (!err) + err = -ENODEV; + dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); return err; } - if (IS_ERR(*hclk)) { + if (IS_ERR_OR_NULL(*hclk)) { err = PTR_ERR(*hclk); + if (!err) + err = -ENODEV; + dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); return err; } -- cgit v1.2.3 From 85d0966fa57e0ef2d30d913c98ca93674f7a03c9 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 14:59:59 +0100 Subject: net/sched: prepare TC actions to properly validate the control action - pass a pointer to struct tcf_proto in each actions's init() handler, to allow validating the control action, checking whether the chain exists and (eventually) refcounting it. - remove code that validates the control action after a successful call to the action's init() handler, and replace it with a test that forbids addition of actions having 'goto_chain' and NULL goto_chain pointer at the same time. - add tcf_action_check_ctrlact(), that will validate the control action and eventually allocate the action 'goto_chain' within the init() handler. - add tcf_action_set_ctrlact(), that will assign the control action and swap the current 'goto_chain' pointer with the new given one. This disallows 'goto_chain' on actions that don't initialize it properly in their init() handler, i.e. calling tcf_action_check_ctrlact() after successful IDR reservation and then calling tcf_action_set_ctrlact() to assign 'goto_chain' and 'tcf_action' consistently. By doing this, the kernel does not leak anymore refcounts when a valid 'goto chain' handle is replaced in TC actions, causing kmemleak splats like the following one: # tc chain add dev dd0 chain 42 ingress protocol ip flower \ > ip_proto tcp action drop # tc chain add dev dd0 chain 43 ingress protocol ip flower \ > ip_proto udp action drop # tc filter add dev dd0 ingress matchall \ > action gact goto chain 42 index 66 # tc filter replace dev dd0 ingress matchall \ > action gact goto chain 43 index 66 # echo scan >/sys/kernel/debug/kmemleak <...> unreferenced object 0xffff93c0ee09f000 (size 1024): comm "tc", pid 2565, jiffies 4295339808 (age 65.426s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0 [<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0 [<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110 [<000000006126a348>] netlink_unicast+0x1a0/0x250 [<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0 [<00000000a25a2171>] sock_sendmsg+0x36/0x40 [<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0 [<00000000d0422042>] __sys_sendmsg+0x5e/0xa0 [<000000007a6c61f9>] do_syscall_64+0x5b/0x180 [<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<0000000013eaa334>] 0xffffffffffffffff Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/act_api.h | 7 +++- net/sched/act_api.c | 97 ++++++++++++++++++++++++++-------------------- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 1 + net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 11 +++--- net/sched/act_mirred.c | 1 + net/sched/act_nat.c | 3 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 1 + net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 1 + net/sched/act_skbmod.c | 1 + net/sched/act_tunnel_key.c | 1 + net/sched/act_vlan.c | 2 +- 18 files changed, 84 insertions(+), 56 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index c745e9ccfab2..54fbb49bd08a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -90,7 +90,7 @@ struct tc_action_ops { int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, @@ -181,6 +181,11 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **handle, + struct netlink_ext_ack *newchain); +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *newchain); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index aecf1bf233c8..fe67b98ac641 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -28,23 +28,6 @@ #include #include -static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) -{ - u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK; - - if (!tp) - return -EINVAL; - a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index); - if (!a->goto_chain) - return -ENOMEM; - return 0; -} - -static void tcf_action_goto_chain_fini(struct tc_action *a) -{ - tcf_chain_put_by_act(a->goto_chain); -} - static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { @@ -71,6 +54,53 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, call_rcu(&old->rcu, tcf_free_cookie_rcu); } +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **newchain, + struct netlink_ext_ack *extack) +{ + int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; + u32 chain_index; + + if (!opcode) + ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; + else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) + ret = 0; + if (ret) { + NL_SET_ERR_MSG(extack, "invalid control action"); + goto end; + } + + if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { + chain_index = action & TC_ACT_EXT_VAL_MASK; + if (!tp || !newchain) { + ret = -EINVAL; + NL_SET_ERR_MSG(extack, + "can't goto NULL proto/chain"); + goto end; + } + *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); + if (!*newchain) { + ret = -ENOMEM; + NL_SET_ERR_MSG(extack, + "can't allocate goto_chain"); + } + } +end: + return ret; +} +EXPORT_SYMBOL(tcf_action_check_ctrlact); + +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *newchain) +{ + struct tcf_chain *oldchain = a->goto_chain; + + a->tcfa_action = action; + a->goto_chain = newchain; + return oldchain; +} +EXPORT_SYMBOL(tcf_action_set_ctrlact); + /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected @@ -78,13 +108,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, */ static void free_tcf(struct tc_action *p) { + struct tcf_chain *chain = p->goto_chain; + free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->act_cookie, NULL); - if (p->goto_chain) - tcf_action_goto_chain_fini(p); + if (chain) + tcf_chain_put_by_act(chain); kfree(p); } @@ -800,15 +832,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -static bool tcf_action_valid(int action) -{ - int opcode = TC_ACT_EXT_OPCODE(action); - - if (!opcode) - return action <= TC_ACT_VALUE_MAX; - return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC; -} - struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -890,10 +913,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, /* backward compatibility for policer */ if (name == NULL) err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, extack); + rtnl_held, tp, extack); else err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - extack); + tp, extack); if (err < 0) goto err_mod; @@ -907,18 +930,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { - err = tcf_action_goto_chain_init(a, tp); - if (err) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); - return ERR_PTR(err); - } - } - - if (!tcf_action_valid(a->tcfa_action)) { + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && + !a->goto_chain) { tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Invalid control action value"); + NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); return ERR_PTR(-EINVAL); } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index aa5c38d11a30..3c0468f2aae6 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -278,7 +278,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int replace, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 5d24993cccfe..44aa046a92ea 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -97,6 +97,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index c79aca29505e..9ba0f61a1e82 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 93da0004e9f4..b8ad311bd8cc 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -57,7 +57,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9b1f2b3990ee..c1ba74d5c1e3 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -469,7 +469,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 98f5b6ea77b4..04a0b5c61194 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind) + const struct tc_action_ops *ops, int ovr, int bind, + struct tcf_proto *tp) { struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; @@ -205,20 +206,20 @@ err1: static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind); + bind, tp); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, + int bind, bool unlocked, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind); + bind, tp); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6692fd054617..383f4024452c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -94,6 +94,7 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 543eab9193f1..de4b493e26d2 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -38,7 +38,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct netlink_ext_ack *extack) + bool rtnl_held, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index a80373878df7..8ca82aefa11a 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -138,7 +138,7 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8271a6263824..229eba7925e5 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -83,6 +83,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 203e399e5c85..36b8adbe935d 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index d54cb608dbaf..4916dc3e3668 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -78,7 +78,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 65879500b688..4566eff3d027 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -96,6 +96,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7bac1d78e7a3..b9ab2c8f07f1 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -82,6 +82,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 7c6591b991d5..fc295d91559a 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -210,6 +210,7 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ac0061599225..4651ee15e35d 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -105,7 +105,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; -- cgit v1.2.3 From 4e1810049c267c203c19130301203d0591174535 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:00 +0100 Subject: net/sched: act_bpf: validate the control action inside init() the following script: # tc filter add dev crash0 egress matchall \ > action bpf bytecode '1,6 0 0 4294967295' pass index 90 # tc actions replace action bpf \ > bytecode '1,6 0 0 4294967295' goto chain 42 index 90 cookie c1a0c1a0 # tc action show action bpf had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: bpf bytecode '1,6 0 0 4294967295' default-action goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffb3a0803dfa90 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff942b347ada00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffb3a08034d038 RDI: ffff942b347ada00 RBP: ffffb3a0803dfb30 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffffb3a0803dfb0c R12: ffff942b3b682b00 R13: ffff942b3b682b08 R14: 0000000000000001 R15: ffff942b3b682f00 FS: 00007f6160a72740(0000) GS:ffff942b3da80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000795a4002 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip_finish_output2+0x16f/0x430 ip_finish_output2+0x16f/0x430 ? ip_output+0x69/0xe0 ip_output+0x69/0xe0 ? ip_forward_options+0x1a0/0x1a0 ip_send_skb+0x15/0x40 raw_sendmsg+0x8e1/0xbd0 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0xc/0xa0 ? try_to_wake_up+0x54/0x480 ? ldsem_down_read+0x3f/0x280 ? _cond_resched+0x15/0x40 ? down_read+0xe/0x30 ? copy_termios+0x1e/0x70 ? tty_mode_ioctl+0x1b6/0x4c0 ? sock_sendmsg+0x36/0x40 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 ? do_vfs_ioctl+0xa4/0x640 ? handle_mm_fault+0xdc/0x210 ? syscall_trace_enter+0x1df/0x2e0 ? __audit_syscall_exit+0x216/0x260 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f615f7e3c03 Code: 48 8b 0d 90 62 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 9d c3 2c 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 4b cc 00 00 48 89 04 24 RSP: 002b:00007ffee5d8cc28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055a4f28f1700 RCX: 00007f615f7e3c03 RDX: 0000000000000040 RSI: 000055a4f28f1700 RDI: 0000000000000003 RBP: 00007ffee5d8e340 R08: 000055a4f28ee510 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 R13: 000055a4f28f16c0 R14: 000055a4f28ef69c R15: 0000000000000080 Modules linked in: act_bpf veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache crct10dif_pclmul jbd2 crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper pcspkr joydev virtio_balloon snd_timer snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper virtio_blk virtio_net virtio_console net_failover failover syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel ata_piix serio_raw libata virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_bpf_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 23 +++++++++++++++----- .../selftests/tc-testing/tc-tests/actions/bpf.json | 25 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 3c0468f2aae6..3841156aa09f 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -282,6 +283,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; @@ -323,12 +325,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return ret; } + ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (ret < 0) + goto release_idr; + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { ret = -EINVAL; - goto out; + goto put_chain; } memset(&cfg, 0, sizeof(cfg)); @@ -336,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : tcf_bpf_init_from_efd(tb, &cfg); if (ret < 0) - goto out; + goto put_chain; prog = to_bpf(*act); @@ -350,10 +356,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (cfg.bpf_num_ops) prog->bpf_num_ops = cfg.bpf_num_ops; - prog->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch); rcu_assign_pointer(prog->filter, cfg.filter); spin_unlock_bh(&prog->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (res == ACT_P_CREATED) { tcf_idr_insert(tn, *act); } else { @@ -363,9 +372,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } return res; -out: - tcf_idr_release(*act, bind); +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + +release_idr: + tcf_idr_release(*act, bind); return ret; } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5970cee6d05f..b074ea9b6fe8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -286,5 +286,30 @@ "teardown": [ "$TC action flush action bpf" ] + }, + { + "id": "b8a1", + "name": "Replace bpf action with invalid goto_chain control", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + "$TC action add action bpf bytecode '1,6 0 0 4294967295' pass index 90" + ], + "cmdUnderTest": "$TC action replace action bpf bytecode '1,6 0 0 4294967295' goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC action list action bpf", + "matchPattern": "action order [0-9]*: bpf.* default-action pass.*index 90", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] } ] -- cgit v1.2.3 From f5c29d83866d75585f9c89754de0b86b45ceab89 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:01 +0100 Subject: net/sched: act_csum: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall action csum icmp pass index 90 # tc actions replace action csum icmp goto chain 42 index 90 \ > cookie c1a0c1a0 # tc actions show action csum had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: csum (icmp) action goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 8000000074692067 P4D 8000000074692067 PUD 2e210067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff93153da03be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9314ee40f700 RCX: 0000000000003a00 RDX: 0000000000000000 RSI: ffff931537c87828 RDI: ffff931537c87818 RBP: ffff93153da03c80 R08: 00000000527cffff R09: 0000000000000003 R10: 000000000000003f R11: 0000000000000028 R12: ffff9314edf68400 R13: ffff9314edf68408 R14: 0000000000000001 R15: ffff9314ed67b600 FS: 0000000000000000(0000) GS:ffff93153da00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000073e32003 CR4: 00000000001606f0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 66 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffffff9a803e98 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffff99e184f0 RBX: 0000000000000000 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000000 RBP: 0000000000000000 R08: 000eb5c4572376b3 R09: 0000000000000000 R10: ffffa53e806a3ca0 R11: 00000000000f4240 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_kernel+0x49e/0x4be secondary_startup_64+0xa4/0xb0 Modules linked in: act_csum veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel mbcache snd_hda_codec jbd2 snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd snd_timer glue_helper snd joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt net_failover fb_sys_fops virtio_console virtio_blk ttm failover drm ata_piix crc32c_intel floppy virtio_pci serio_raw libata virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_csum_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_csum.c | 20 ++++++++++++++--- .../tc-testing/tc-tests/actions/csum.json | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 9ba0f61a1e82..0c77e7bdf6d5 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -52,6 +53,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, csum_net_id); struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; @@ -87,21 +89,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p = to_tcf_csum(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->update_flags = parm->update_flags; spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(p->params, params_new, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (params_new) kfree_rcu(params_new, rcu); @@ -109,6 +117,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } /** diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index a022792d392a..ddabb2fbb7c7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -500,5 +500,30 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "d128", + "name": "Replace csum action with invalid goto chain control", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "$TC actions add action csum iph index 90" + ], + "cmdUnderTest": "$TC actions replace action csum iph goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action csum index 90", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] } ] -- cgit v1.2.3 From 0da2dbd6029c2be4191651bafa57c3c006eff63c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:02 +0100 Subject: net/sched: act_gact: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action gact pass index 90 # tc actions replace action gact \ > goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action gact had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: gact action goto chain 42 random type none pass val 0 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff8c2434703be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff8c23ed6d7e00 RCX: 000000000000005a RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8c23ed6d7e00 RBP: ffff8c2434703c80 R08: ffff8c243b639ac8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2429e68b00 R13: ffff8c2429e68b08 R14: 0000000000000001 R15: ffff8c2429c5a480 FS: 0000000000000000(0000) GS:ffff8c2434700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000002dc0e005 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 74 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffff9c8640387eb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffff8b2184f0 RBX: 0000000000000002 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000002 RBP: 0000000000000002 R08: 000eb57882b36cc3 R09: 0000000000000020 R10: 0000000000000004 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_gact act_bpf veth ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic ext4 snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core mbcache jbd2 snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper virtio_balloon joydev pcspkr snd_timer snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea virtio_net sysfillrect net_failover virtio_blk sysimgblt fb_sys_fops virtio_console ttm failover drm crc32c_intel serio_raw ata_piix libata floppy virtio_pci virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_bpf] CR2: 0000000000000000 Validating the control action within tcf_gact_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_gact.c | 13 ++++++++++- .../tc-testing/tc-tests/actions/gact.json | 25 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b8ad311bd8cc..e540e31069d7 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; struct tcf_gact *gact; int ret = 0; @@ -116,10 +118,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; gact = to_gact(*a); spin_lock_bh(&gact->tcf_lock); - gact->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; @@ -133,9 +138,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif spin_unlock_bh(&gact->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index 89189a03ce3d..814b7a8a478b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -560,5 +560,30 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "ca89", + "name": "Replace gact action with invalid goto chain control", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pass random determ drop 2 index 90" + ], + "cmdUnderTest": "$TC actions replace action goto chain 42 random determ drop 5 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*random type determ drop val 2.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] } ] -- cgit v1.2.3 From 11a94d7fd80f92325e7b8653290ad3d2cd67f119 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:03 +0100 Subject: net/sched: act_ife: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action ife encode allow mark pass index 90 # tc actions replace action ife \ > encode allow mark goto chain 42 index 90 cookie c1a0c1a0 # tc action show action ife had the following output: IFE type 0xED3E IFE type 0xED3E Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: ife encode action goto chain 42 type 0XED3E allow mark index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000007b4e7067 P4D 800000007b4e7067 PUD 7b4e6067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 164 Comm: kworker/2:1 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffa6a7c0553ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9796ee1bbd00 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffa6a7c0553b70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff9797385bb038 R12: ffff9796ead9d700 R13: ffff9796ead9d708 R14: 0000000000000001 R15: ffff9796ead9d800 FS: 0000000000000000(0000) GS:ffff97973db00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007c41e006 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_gact act_meta_mark act_ife dummy veth ip6table_filter ip6_tables iptable_filter binfmt_misc snd_hda_codec_generic ext4 snd_hda_intel snd_hda_codec crct10dif_pclmul mbcache crc32_pclmul jbd2 snd_hwdep snd_hda_core ghash_clmulni_intel snd_seq snd_seq_device snd_pcm snd_timer aesni_intel crypto_simd snd cryptd glue_helper virtio_balloon joydev pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl virtio_net drm_kms_helper virtio_blk net_failover syscopyarea failover sysfillrect virtio_console sysimgblt fb_sys_fops ttm drm crc32c_intel serio_raw ata_piix virtio_pci virtio_ring libata virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_ife] CR2: 0000000000000000 Validating the control action within tcf_ife_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_ife.c | 33 +++++++++++++--------- .../selftests/tc-testing/tc-tests/actions/ife.json | 25 ++++++++++++++++ 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c1ba74d5c1e3..31c6ffb6abe7 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -474,6 +475,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_ife_params *p; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; @@ -531,6 +533,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { @@ -563,13 +569,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (tb[TCA_IFE_METALST]) { err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], NULL, NULL); - if (err) { -metadata_parse_err: - tcf_idr_release(*a, bind); - kfree(p); - return err; - } - + if (err) + goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; @@ -581,21 +582,20 @@ metadata_parse_err: * going to bail out */ err = use_all_metadata(ife, exists); - if (err) { - tcf_idr_release(*a, bind); - kfree(p); - return err; - } + if (err) + goto metadata_parse_err; } if (exists) spin_lock_bh(&ife->tcf_lock); - ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(ife->params, p, 1); if (exists) spin_unlock_bh(&ife->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); @@ -603,6 +603,13 @@ metadata_parse_err: tcf_idr_insert(tn, *a); return ret; +metadata_parse_err: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + kfree(p); + tcf_idr_release(*a, bind); + return err; } static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 0da3545cabdb..c13a68b98fc7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1060,5 +1060,30 @@ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", "matchCount": "0", "teardown": [] + }, + { + "id": "a0e2", + "name": "Replace ife encode action with invalid goto chain control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode allow mark pass index 90" + ], + "cmdUnderTest": "$TC actions replace action ife encode allow mark goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E .*allow mark.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] } ] -- cgit v1.2.3 From ff9721d32b1aba8bf46a06df20827d0a5d52ec48 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:04 +0100 Subject: net/sched: act_mirred: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action mirred ingress mirror dev lo pass # tc actions replace action mirred \ > ingress mirror dev lo goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action mirred had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: mirred (Ingress Mirror to device lo) goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: Mirror/redirect action on BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 47 Comm: kworker/3:1 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffa772404b7ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9c5afc3f4300 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9c5afdba9380 RDI: 0000000000029380 RBP: ffffa772404b7b70 R08: ffff9c5af7010028 R09: ffff9c5af7010029 R10: 0000000000000000 R11: ffff9c5af94c6a38 R12: ffff9c5af7953000 R13: ffff9c5af7953008 R14: 0000000000000001 R15: ffff9c5af7953d00 FS: 0000000000000000(0000) GS:ffff9c5afdb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007c514004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_mirred veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul snd_hda_codec_generic crc32_pclmul snd_hda_intel snd_hda_codec mbcache ghash_clmulni_intel jbd2 snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel snd_timer snd crypto_simd cryptd glue_helper soundcore virtio_balloon joydev pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_net ttm virtio_blk net_failover virtio_console failover drm ata_piix crc32c_intel virtio_pci serio_raw libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_mirred_init() proved to fix the above issue. For the same reason, postpone the assignment of tcfa_action and tcfm_eaction to avoid partial reconfiguration of a mirred rule when it's replaced by another one that mirrors to a device that does not exist. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 21 ++++++++++++++---- .../tc-testing/tc-tests/actions/mirred.json | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 383f4024452c..cd712e4e8998 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -99,6 +99,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; @@ -158,18 +159,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + m = to_mirred(*a); spin_lock_bh(&m->tcf_lock); - m->tcf_action = parm->action; - m->tcfm_eaction = parm->eaction; if (parm->ifindex) { dev = dev_get_by_index(net, parm->ifindex); if (!dev) { spin_unlock_bh(&m->tcf_lock); - tcf_idr_release(*a, bind); - return -ENODEV; + err = -ENODEV; + goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(dev); rcu_swap_protected(m->tcfm_dev, dev, @@ -178,7 +181,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); @@ -189,6 +196,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index db49fd0f8445..6e5fb3d25681 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -434,5 +434,30 @@ "teardown": [ "$TC actions flush action mirred" ] + }, + { + "id": "2a9a", + "name": "Replace mirred action with invalid goto chain control", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred ingress mirror dev lo drop index 90" + ], + "cmdUnderTest": "$TC actions replace action mirred ingress mirror dev lo goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action mirred index 90", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] } ] -- cgit v1.2.3 From c53075ea5d3c44849992523d5d83e2810d05a00e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:05 +0100 Subject: net/sched: act_connmark: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action connmark pass index 90 # tc actions replace action connmark \ > goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action connmark had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: connmark zone 0 goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 302 Comm: kworker/0:2 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff9bea406c3ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff8c5dfc009f00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9bea406c3a80 RDI: ffff8c5dfb9d6ec0 RBP: ffff9bea406c3b70 R08: ffff8c5dfda222a0 R09: ffffffff90933c3c R10: 0000000000000000 R11: 0000000092793f7d R12: ffff8c5df48b3c00 R13: ffff8c5df48b3c08 R14: 0000000000000001 R15: ffff8c5dfb9d6e40 FS: 0000000000000000(0000) GS:ffff8c5dfda00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000062e0e006 CR4: 00000000001606f0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_connmark nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul mbcache crc32_pclmul jbd2 snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel snd_timer crypto_simd cryptd snd glue_helper joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper virtio_net net_failover syscopyarea virtio_blk failover virtio_console sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix crc32c_intel serio_raw libata virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_connmark_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_connmark.c | 21 +++++++++++++++--- .../tc-testing/tc-tests/actions/connmark.json | 25 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 44aa046a92ea..32ae0cd6e31c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -102,9 +103,10 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; - int ret = 0; + int ret = 0, err; if (!nla) return -EINVAL; @@ -129,7 +131,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } ci = to_connmark(*a); - ci->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->net = net; ci->zone = parm->zone; @@ -143,15 +149,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; /* replacing action and zone */ spin_lock_bh(&ci->tcf_lock); - ci->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->zone = parm->zone; spin_unlock_bh(&ci->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); ret = 0; } return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json index 13147a1f5731..cadde8f41fcd 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json @@ -287,5 +287,30 @@ "teardown": [ "$TC actions flush action connmark" ] + }, + { + "id": "c506", + "name": "Replace connmark with invalid goto chain control", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ], + "$TC actions add action connmark pass index 90" + ], + "cmdUnderTest": "$TC actions replace action connmark goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action connmark index 90", + "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] } ] -- cgit v1.2.3 From 1e45d043a8bb2ed8a541384db3cc133e92001f0c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:06 +0100 Subject: net/sched: act_nat: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action nat ingress 1.18.1.1 1.18.2.2 pass index 90 # tc actions replace action nat \ > ingress 1.18.1.1 1.18.2.2 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action nat had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: nat ingress 1.18.1.1/32 1.18.2.2 goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000002d180067 P4D 800000002d180067 PUD 7cb8b067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 164 Comm: kworker/3:1 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffae4500e2fad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9fa52e28c800 RCX: 0000000001011201 RDX: 0000000000000000 RSI: 0000000000000056 RDI: ffff9fa52ca12800 RBP: ffffae4500e2fb70 R08: 0000000000000022 R09: 000000000000000e R10: 00000000ffffffff R11: 0000000001011201 R12: ffff9fa52cbc9c00 R13: ffff9fa52cbc9c08 R14: 0000000000000001 R15: ffff9fa52ca12780 FS: 0000000000000000(0000) GS:ffff9fa57db80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000073f8c004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_nat veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mbcache jbd2 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper snd_timer snd joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs qxl ata_generic pata_acpi drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net virtio_blk net_failover failover virtio_console drm crc32c_intel floppy ata_piix libata virtio_pci virtio_ring virtio serio_raw dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_nat_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_nat.c | 12 ++++++++++- .../selftests/tc-testing/tc-tests/actions/nat.json | 25 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index de4b493e26d2..e91bb8eb81ec 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; int ret = 0, err; struct tcf_nat *p; @@ -77,6 +79,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else { return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; p = to_tcf_nat(*a); spin_lock_bh(&p->tcf_lock); @@ -85,13 +90,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, p->mask = parm->mask; p->flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index 0080dc2fd41c..bc12c1ccad30 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -589,5 +589,30 @@ "teardown": [ "$TC actions flush action nat" ] + }, + { + "id": "4b12", + "name": "Replace nat action with invalid goto chain control", + "category": [ + "actions", + "nat" + ], + "setup": [ + [ + "$TC actions flush action nat", + 0, + 1, + 255 + ], + "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 90" + ], + "cmdUnderTest": "$TC actions replace action nat ingress 1.18.1.1 1.18.2.2 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action nat index 90", + "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action nat" + ] } ] -- cgit v1.2.3 From 6ac86ca3524b4549d31c45d11487b0626c334f10 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:07 +0100 Subject: net/sched: act_pedit: validate the control action inside init() the following script: # tc filter add dev crash0 egress matchall \ > action pedit ex munge ip ttl set 10 pass index 90 # tc actions replace action pedit \ > ex munge ip ttl set 10 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action pedit had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: pedit action goto chain 42 keys 1 index 90 ref 2 bind 1 key #0 at ipv4+8: val 0a000000 mask 00ffffff cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff94a73db03be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff94a6ee4c0700 RCX: 000000000000000a RDX: 0000000000000000 RSI: ffff94a6ed22c800 RDI: 0000000000000000 RBP: ffff94a73db03c80 R08: ffff94a7386fa4c8 R09: ffff94a73229ea20 R10: 0000000000000000 R11: 0000000000000000 R12: ffff94a6ed22cb00 R13: ffff94a6ed22cb08 R14: 0000000000000001 R15: ffff94a6ed22c800 FS: 0000000000000000(0000) GS:ffff94a73db00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007120e002 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 4e ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffab1740387eb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffffb18184f0 RBX: 0000000000000002 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000002 RBP: 0000000000000002 R08: 000f168fa695f9a9 R09: 0000000000000020 R10: 0000000000000004 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_pedit veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep aesni_intel snd_hda_core crypto_simd snd_seq cryptd glue_helper snd_seq_device snd_pcm joydev snd_timer pcspkr virtio_balloon snd soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs qxl ata_generic pata_acpi drm_kms_helper virtio_net net_failover syscopyarea sysfillrect sysimgblt failover virtio_blk fb_sys_fops virtio_console ttm drm crc32c_intel serio_raw ata_piix virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_pedit_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 16 ++++++- .../tc-testing/tc-tests/actions/pedit.json | 51 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 8ca82aefa11a..287793abfaf9 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -23,6 +23,7 @@ #include #include #include +#include static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -142,6 +143,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; struct tcf_pedit_key_ex *keys_ex; struct tc_pedit *parm; @@ -205,6 +207,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_free; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + goto out_release; + } p = to_pedit(*a); spin_lock_bh(&p->tcf_lock); @@ -214,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!keys) { spin_unlock_bh(&p->tcf_lock); ret = -ENOMEM; - goto out_release; + goto put_chain; } kfree(p->tcfp_keys); p->tcfp_keys = keys; @@ -223,16 +230,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, memcpy(p->tcfp_keys, parm->keys, ksize); p->tcfp_flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); kfree(p->tcfp_keys_ex); p->tcfp_keys_ex = keys_ex; spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); out_release: tcf_idr_release(*a, bind); out_free: diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json new file mode 100644 index 000000000000..b73ceb9e28b1 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -0,0 +1,51 @@ +[ + { + "id": "319a", + "name": "Add pedit action that mangles IP TTL", + "category": [ + "actions", + "pedit" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip ttl set 10", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 1 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "7e67", + "name": "Replace pedit action with invalid goto chain", + "category": [ + "actions", + "pedit" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ], + "$TC actions add action pedit ex munge ip ttl set 10 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action pedit ex munge ip ttl set 10 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 90 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + } +] -- cgit v1.2.3 From d6124d6ba697413efc53ff6919b1e0c250f1902a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:08 +0100 Subject: net/sched: act_police: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action police rate 3mbit burst 250k pass index 90 # tc actions replace action police \ > rate 3mbit burst 250k goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action police rate 3mbit burst had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: police 0x5a rate 3Mbit burst 250Kb mtu 2Kb action goto chain 42 overhead 0b ref 2 bind 1 cookie c1a0c1a0 Then, when crash0 starts transmitting more than 3Mbit/s, the following kernel crash is observed: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000007a779067 P4D 800000007a779067 PUD 2ad96067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 5032 Comm: netperf Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffb0e04064fa60 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff93bb3322cce0 RCX: 0000000000000005 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff93bb3322cce0 RBP: ffffb0e04064fb00 R08: 0000000000000022 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff93bb3beed300 R13: ffff93bb3beed308 R14: 0000000000000001 R15: ffff93bb3b64d000 FS: 00007f0bc6be5740(0000) GS:ffff93bb3db80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000746a8001 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ipt_do_table+0x31c/0x420 [ip_tables] ? ip_finish_output2+0x16f/0x430 ip_finish_output2+0x16f/0x430 ? ip_output+0x69/0xe0 ip_output+0x69/0xe0 ? ip_forward_options+0x1a0/0x1a0 __tcp_transmit_skb+0x563/0xa40 tcp_write_xmit+0x243/0xfa0 __tcp_push_pending_frames+0x32/0xf0 tcp_sendmsg_locked+0x404/0xd30 tcp_sendmsg+0x27/0x40 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 ? __sys_connect+0x87/0xf0 ? syscall_trace_enter+0x1df/0x2e0 ? __audit_syscall_exit+0x216/0x260 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f0bc5ffbafd Code: 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 8b 05 ae c4 2c 00 85 c0 75 2d 45 31 c9 45 31 c0 4c 63 d1 48 63 ff b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 63 63 2c 00 f7 d8 64 89 02 48 RSP: 002b:00007fffef94b7f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000004000 RCX: 00007f0bc5ffbafd RDX: 0000000000004000 RSI: 00000000017e5420 RDI: 0000000000000004 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004 R13: 00000000017e51d0 R14: 0000000000000010 R15: 0000000000000006 Modules linked in: act_police veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic mbcache crct10dif_pclmul jbd2 crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper snd_timer snd joydev pcspkr virtio_balloon soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_blk virtio_net virtio_console net_failover failover crc32c_intel ata_piix libata serio_raw virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_police_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_police.c | 12 ++++++++++- .../tc-testing/tc-tests/actions/police.json | 25 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 229eba7925e5..2b8581f6ab51 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -21,6 +21,7 @@ #include #include #include +#include struct tcf_police_params { int tcfp_result; @@ -88,6 +89,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, { int ret = 0, tcfp_result = TC_ACT_OK, err, size; struct nlattr *tb[TCA_POLICE_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; @@ -129,6 +131,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; police = to_police(*a); if (parm->rate.rate) { @@ -214,12 +219,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new->peak_present) police->tcfp_ptoks = new->tcfp_mtu_ptoks; spin_unlock_bh(&police->tcfp_lock); - police->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(police->params, new, lockdep_is_held(&police->tcf_lock)); spin_unlock_bh(&police->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (new) kfree_rcu(new, rcu); @@ -230,6 +237,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: tcf_idr_release(*a, bind); return err; } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 4086a50a670e..b8268da5adaa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -739,5 +739,30 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "689e", + "name": "Replace police action with invalid goto chain control", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 3mbit burst 250k drop index 90" + ], + "cmdUnderTest": "$TC actions replace action police rate 3mbit burst 250k goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action police index 90", + "matchPattern": "action order [0-9]*: police 0x5a rate 3Mbit burst 250Kb mtu 2Kb action drop", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] } ] -- cgit v1.2.3 From e8c87c643ef3603c6e63bdecb67b03d794648493 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:09 +0100 Subject: net/sched: act_sample: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action sample rate 1024 group 4 pass index 90 # tc actions replace action sample \ > rate 1024 group 4 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action sample had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: sample rate 1/1024 group 4 goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 8000000079966067 P4D 8000000079966067 PUD 7987b067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffbee60033fad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff99d7ae6e3b00 RCX: 00000000e555df9b RDX: 0000000000000000 RSI: 00000000b0352718 RDI: ffff99d7fda1fcf0 RBP: ffffbee60033fb70 R08: 0000000070731ab1 R09: 0000000000000400 R10: 0000000000000000 R11: ffff99d7ac733838 R12: ffff99d7f3c2be00 R13: ffff99d7f3c2be08 R14: 0000000000000001 R15: ffff99d7f3c2b600 FS: 0000000000000000(0000) GS:ffff99d7fda00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000797de006 CR4: 00000000001606f0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_sample psample veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mbcache jbd2 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device aesni_intel crypto_simd snd_pcm cryptd glue_helper snd_timer joydev snd pcspkr virtio_balloon i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt fb_sys_fops net_failover ttm failover virtio_blk virtio_console drm ata_piix serio_raw crc32c_intel libata virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_sample_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_sample.c | 19 +++++++++++++--- .../tc-testing/tc-tests/actions/sample.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 36b8adbe935d..4060b0955c97 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -43,6 +44,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; + struct tcf_chain *goto_ch = NULL; struct tc_sample *parm; u32 psample_group_num; struct tcf_sample *s; @@ -79,18 +81,21 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, psample_group_num); if (!psample_group) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } s = to_sample(*a); spin_lock_bh(&s->tcf_lock); - s->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); s->psample_group_num = psample_group_num; RCU_INIT_POINTER(s->psample_group, psample_group); @@ -100,10 +105,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } spin_unlock_bh(&s->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_sample_cleanup(struct tc_action *a) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json index 3aca33c00039..27f0acaed880 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -584,5 +584,30 @@ "teardown": [ "$TC actions flush action sample" ] + }, + { + "id": "0a6e", + "name": "Replace sample action with invalid goto chain control", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pass.*index 90", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] } ] -- cgit v1.2.3 From 4b006b0c139e486773335f4c23b4d82348cfeb04 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:10 +0100 Subject: net/sched: act_simple: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action simple sdata hello pass index 90 # tc actions replace action simple \ > sdata world goto chain 42 index 90 cookie c1a0c1a0 # tc action show action simple had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: Simple index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000006a6fb067 P4D 800000006a6fb067 PUD 6aed6067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 3241 Comm: kworker/2:0 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffbe6781763ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9e59bdb80e00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9e59b4716738 RDI: ffff9e59ab12d140 RBP: ffffbe6781763b70 R08: 0000000000000234 R09: 0000000000aaaaaa R10: 0000000000000000 R11: ffff9e59b247cd50 R12: ffff9e59b112f100 R13: ffff9e59b112f108 R14: 0000000000000001 R15: ffff9e59ab12d0c0 FS: 0000000000000000(0000) GS:ffff9e59b4700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000006af92004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_simple veth ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep mbcache snd_hda_core jbd2 snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd snd_timer glue_helper snd joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_net ttm net_failover virtio_console virtio_blk failover drm crc32c_intel serio_raw floppy ata_piix libata virtio_pci virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_simple_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_simple.c | 52 ++++++++++++++++------ .../tc-testing/tc-tests/actions/simple.json | 25 +++++++++++ 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4916dc3e3668..23c8ca5615e5 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -60,14 +61,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata) return 0; } -static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata, - struct tc_defact *p) +static int reset_policy(struct tc_action *a, const struct nlattr *defdata, + struct tc_defact *p, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { + struct tcf_chain *goto_ch = NULL; + struct tcf_defact *d; + int err; + + err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack); + if (err < 0) + return err; + d = to_defact(a); spin_lock_bh(&d->tcf_lock); - d->tcf_action = p->action; + goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + return 0; } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { @@ -82,6 +95,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; struct tcf_defact *d; bool exists = false; @@ -122,27 +136,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } d = to_defact(*a); - ret = alloc_defdata(d, tb[TCA_DEF_DATA]); - if (ret < 0) { - tcf_idr_release(*a, bind); - return ret; - } - d->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + + err = alloc_defdata(d, tb[TCA_DEF_DATA]); + if (err < 0) + goto put_chain; + + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - d = to_defact(*a); - if (!ovr) { - tcf_idr_release(*a, bind); - return -EEXIST; + err = -EEXIST; + goto release_idr; } - reset_policy(d, tb[TCA_DEF_DATA], parm); + err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack); + if (err) + goto release_idr; } if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json index e89a7aa4012d..8e8c1ae12260 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json @@ -126,5 +126,30 @@ "teardown": [ "" ] + }, + { + "id": "b776", + "name": "Replace simple action with invalid goto chain control", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"hello\" pass index 90" + ], + "cmdUnderTest": "$TC actions replace action simple sdata \"world\" goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple .*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] } ] -- cgit v1.2.3 From ec7727bb24b01e96b0c46068addf355ee4f794d8 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:11 +0100 Subject: net/sched: act_skbedit: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action skbedit ptype host pass index 90 # tc actions replace action skbedit \ > ptype host goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action skbedit had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: skbedit ptype host goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 3467 Comm: kworker/3:3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffb50a81e1fad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9aa47ba4ea00 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffff9aa469eeb3c0 RDI: ffff9aa47ba4ea00 RBP: ffffb50a81e1fb70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff9aa47bce0638 R12: ffff9aa4793b0c00 R13: ffff9aa4793b0c08 R14: 0000000000000001 R15: ffff9aa469eeb3c0 FS: 0000000000000000(0000) GS:ffff9aa474780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007360e005 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_skbedit veth ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep mbcache snd_hda_core jbd2 snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd snd_timer glue_helper snd joydev soundcore pcspkr virtio_balloon i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net net_failover drm failover virtio_blk virtio_console ata_piix virtio_pci crc32c_intel serio_raw libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_skbedit_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_skbedit.c | 19 +++++++++++++--- .../tc-testing/tc-tests/actions/skbedit.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 4566eff3d027..7e1d261a31d2 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, skbedit_net_id); struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; @@ -187,11 +189,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return -EEXIST; } } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->flags = flags; @@ -209,16 +214,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->mask = *mask; spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(d->params, params_new, lockdep_is_held(&d->tcf_lock)); spin_unlock_bh(&d->tcf_lock); if (params_new) kfree_rcu(params_new, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index 5aaf593b914a..ecd96eda7f6a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -484,5 +484,30 @@ "teardown": [ "$TC actions flush action skbedit" ] + }, + { + "id": "1b2b", + "name": "Replace skbedit action with invalid goto_chain control", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit ptype host pass index 90" + ], + "cmdUnderTest": "$TC actions replace action skbedit ptype host goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype host pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] } ] -- cgit v1.2.3 From 7c3d825d12c5e6056ea73c0a202cbdef9d9ab9e6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:12 +0100 Subject: net/sched: act_skbmod: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action skbmod set smac 00:c1:a0:c1:a0:00 pass index 90 # tc actions replace action skbmod \ > set smac 00:c1:a0:c1:a0:00 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action skbmod had the following output: src MAC address <00:c1:a0:c1:a0:00> src MAC address <00:c1:a0:c1:a0:00> Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: skbmod goto chain 42 set smac 00:c1:a0:c1:a0:00 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000002d5c7067 P4D 800000002d5c7067 PUD 77e16067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff8987ffd83be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff8987aeb68800 RCX: ffff8987fa263640 RDX: 0000000000000000 RSI: ffff8987f51c8802 RDI: 00000000000000a0 RBP: ffff8987ffd83c80 R08: ffff8987f939bac8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8987f5c77d00 R13: ffff8987f5c77d08 R14: 0000000000000001 R15: ffff8987f0c29f00 FS: 0000000000000000(0000) GS:ffff8987ffd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007832c004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 56 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffa2a1c038feb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffffa94184f0 RBX: 0000000000000003 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000003 RBP: 0000000000000003 R08: 001123cfc2ba71ac R09: 0000000000000000 R10: 0000000000000000 R11: 00000000000f4240 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_skbmod veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mbcache jbd2 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device aesni_intel crypto_simd cryptd glue_helper snd_pcm joydev pcspkr virtio_balloon snd_timer snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt fb_sys_fops net_failover virtio_console ttm virtio_blk failover drm crc32c_intel serio_raw ata_piix virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_skbmod_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 19 +++++++++++++--- .../tc-testing/tc-tests/actions/skbmod.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index b9ab2c8f07f1..1d4c324d0a42 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -88,6 +89,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; + struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; struct tcf_skbmod *d; bool exists = false; @@ -154,21 +156,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; d = to_skbmod(*a); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->flags = lflags; - d->tcf_action = parm->action; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) @@ -184,10 +189,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (p_old) kfree_rcu(p_old, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_skbmod_cleanup(struct tc_action *a) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json index fe3326e939c1..6eb4c4f97060 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -392,5 +392,30 @@ "teardown": [ "$TC actions flush action skbmod" ] + }, + { + "id": "b651", + "name": "Replace skbmod action with invalid goto_chain control", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ], + "$TC actions add action skbmod set etype 0x1111 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action skbmod set etype 0x1111 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pass set etype 0x1111\\s+index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] } ] -- cgit v1.2.3 From e5fdabacbffc5d321bf9f51410fe0db0834606eb Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:13 +0100 Subject: net/sched: act_tunnel_key: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.2 dst_port 3128 \ > nocsum id 1 pass index 90 # tc actions replace action tunnel_key \ > set src_ip 10.10.10.1 dst_ip 20.20.2 dst_port 3128 nocsum id 1 \ > goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action tunnel_key had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.2.0 key_id 1 dst_port 3128 nocsum goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000002aba4067 P4D 800000002aba4067 PUD 795f9067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff9346bdb83be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9346bb795c00 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff93466c881700 RDI: 0000000000000246 RBP: ffff9346bdb83c80 R08: ffff9346b3e1e0c8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9346b978f000 R13: ffff9346b978f008 R14: 0000000000000001 R15: ffff93466dceeb40 FS: 0000000000000000(0000) GS:ffff9346bdb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007a6c2002 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 55 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffa48a8038feb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffffaa8184f0 RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0011251c6fcfac49 R09: ffff9346b995be00 R10: ffffa48a805e7ce8 R11: 00000000024c38dd R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_tunnel_key veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel mbcache snd_hda_intel jbd2 snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev snd_timer snd pcspkr virtio_balloon soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt fb_sys_fops ttm net_failover virtio_console virtio_blk failover drm serio_raw crc32c_intel ata_piix virtio_pci floppy virtio_ring libata virtio dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_tunnel_key_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 18 ++++++++++++++-- .../tc-testing/tc-tests/actions/tunnel_key.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fc295d91559a..d5aaf90a3971 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -217,6 +218,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; + struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; @@ -360,6 +362,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, goto release_tun_meta; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + exists = true; + goto release_tun_meta; + } t = to_tunnel_key(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); @@ -367,23 +375,29 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; - goto release_tun_meta; + goto put_chain; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; spin_lock_bh(&t->tcf_lock); - t->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); tunnel_key_release_params(params_new); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + release_tun_meta: if (metadata) dst_release(&metadata->dst); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index e7e15a7336b6..28453a445fdb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -884,5 +884,30 @@ "teardown": [ "$TC actions flush action tunnel_key" ] + }, + { + "id": "8242", + "name": "Replace tunnel_key set action with invalid goto chain", + "category": [ + "actions", + "tunnel_key" + ], + "setup": [ + [ + "$TC actions flush action tunnel_key", + 0, + 1, + 255 + ], + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 10.10.10.2 dst_ip 20.20.20.1 dst_port 3129 id 2 csum goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action tunnel_key index 90", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*csum pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action tunnel_key" + ] } ] -- cgit v1.2.3 From 7e0c8892df7d0316ec853adbf84db536cd53258c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:14 +0100 Subject: net/sched: act_vlan: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action vlan pop pass index 90 # tc actions replace action vlan \ > pop goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action vlan had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: vlan pop goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000007974f067 P4D 800000007974f067 PUD 79638067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff982dfdb83be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff982dfc55db00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff982df97099c0 RDI: ffff982dfc55db00 RBP: ffff982dfdb83c80 R08: ffff982df983fec8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff982df5aacd00 R13: ffff982df5aacd08 R14: 0000000000000001 R15: ffff982df97099c0 FS: 0000000000000000(0000) GS:ffff982dfdb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000796d0005 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? enqueue_hrtimer+0x39/0x90 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 7b ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffa4714038feb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffff840184f0 RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000001e57d3f387 RBP: 0000000000000003 R08: 001125d9ca39e1eb R09: 0000000000000000 R10: 000000000000027d R11: 000000000009f400 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_vlan veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic mbcache crct10dif_pclmul jbd2 snd_hda_intel crc32_pclmul snd_hda_codec ghash_clmulni_intel snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev snd_timer virtio_balloon snd pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt virtio_net fb_sys_fops virtio_blk ttm net_failover virtio_console failover ata_piix drm libata crc32c_intel virtio_pci serio_raw virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_vlan_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 20 ++++++++++++++--- .../tc-testing/tc-tests/actions/vlan.json | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 4651ee15e35d..0f40d0a74423 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -109,6 +110,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_vlan_params *p; struct tc_vlan *parm; struct tcf_vlan *v; @@ -200,12 +202,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + v = to_vlan(*a); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->tcfv_action = action; @@ -214,16 +220,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, p->tcfv_push_proto = push_proto; spin_lock_bh(&v->tcf_lock); - v->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); spin_unlock_bh(&v->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_vlan_cleanup(struct tc_action *a) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 69ea09eefffc..cc7c7d758008 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -688,5 +688,30 @@ "teardown": [ "$TC actions flush action vlan" ] + }, + { + "id": "e394", + "name": "Replace vlan push action with invalid goto chain control", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 500 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action vlan push id 500 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action vlan index 90", + "matchPattern": "action order [0-9]+: vlan.*push id 500 protocol 802.1Q priority 0 pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] } ] -- cgit v1.2.3 From fe384e2fa36ca084a456fd30558cccc75b4b3fbd Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:15 +0100 Subject: net/sched: don't dereference a->goto_chain to read the chain index callers of tcf_gact_goto_chain_index() can potentially read an old value of the chain index, or even dereference a NULL 'goto_chain' pointer, because 'goto_chain' and 'tcfa_action' are read in the traffic path without caring of concurrent write in the control path. The most recent value of chain index can be read also from a->tcfa_action (it's encoded there together with TC_ACT_GOTO_CHAIN bits), so we don't really need to dereference 'goto_chain': just read the chain id from the control action. Fixes: e457d86ada27 ("net: sched: add couple of goto_chain helpers") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index ee8d005f56fc..eb8f01c819e6 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -56,7 +56,7 @@ static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) { - return a->goto_chain->index; + return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } #endif /* __NET_TC_GACT_H */ -- cgit v1.2.3 From ee3bbfe806cdb46b02cda63626cb50a7a7b19fc5 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:16 +0100 Subject: net/sched: let actions use RCU to access 'goto_chain' use RCU when accessing the action chain, to avoid use after free in the traffic path when 'goto chain' is replaced on existing TC actions (see script below). Since the control action is read in the traffic path without holding the action spinlock, we need to explicitly ensure that a->goto_chain is not NULL before dereferencing (i.e it's not sufficient to rely on the value of TC_ACT_GOTO_CHAIN bits). Not doing so caused NULL dereferences in tcf_action_goto_chain_exec() when the following script: # tc chain add dev dd0 chain 42 ingress protocol ip flower \ > ip_proto udp action pass index 4 # tc filter add dev dd0 ingress protocol ip flower \ > ip_proto udp action csum udp goto chain 42 index 66 # tc chain del dev dd0 chain 42 ingress (start UDP traffic towards dd0) # tc action replace action csum udp pass index 66 was run repeatedly for several hours. Suggested-by: Cong Wang Suggested-by: Vlad Buslov Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- include/net/sch_generic.h | 1 + net/sched/act_api.c | 18 ++++++++++-------- net/sched/cls_api.c | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 54fbb49bd08a..c61a1bf4e3de 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -39,7 +39,7 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; - struct tcf_chain *goto_chain; + struct tcf_chain __rcu *goto_chain; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 31284c078d06..7d1a0483a17b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -378,6 +378,7 @@ struct tcf_chain { bool flushing; const struct tcf_proto_ops *tmplt_ops; void *tmplt_priv; + struct rcu_head rcu; }; struct tcf_block { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fe67b98ac641..5a87e271d35a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -31,7 +31,7 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { - const struct tcf_chain *chain = a->goto_chain; + const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } @@ -91,13 +91,11 @@ end: EXPORT_SYMBOL(tcf_action_check_ctrlact); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, - struct tcf_chain *newchain) + struct tcf_chain *goto_chain) { - struct tcf_chain *oldchain = a->goto_chain; - a->tcfa_action = action; - a->goto_chain = newchain; - return oldchain; + rcu_swap_protected(a->goto_chain, goto_chain, 1); + return goto_chain; } EXPORT_SYMBOL(tcf_action_set_ctrlact); @@ -108,7 +106,7 @@ EXPORT_SYMBOL(tcf_action_set_ctrlact); */ static void free_tcf(struct tc_action *p) { - struct tcf_chain *chain = p->goto_chain; + struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); @@ -686,6 +684,10 @@ repeat: return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { + if (unlikely(!rcu_access_pointer(a->goto_chain))) { + net_warn_ratelimited("can't go to NULL chain!\n"); + return TC_ACT_SHOT; + } tcf_action_goto_chain_exec(a, res); } @@ -931,7 +933,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, module_put(a_o->owner); if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !a->goto_chain) { + !rcu_access_pointer(a->goto_chain)) { tcf_action_destroy_1(a, bind); NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); return ERR_PTR(-EINVAL); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc10525e90e7..99ae30c177c7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -367,7 +367,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) struct tcf_block *block = chain->block; mutex_destroy(&chain->filter_chain_lock); - kfree(chain); + kfree_rcu(chain, rcu); if (free_block) tcf_block_destroy(block); } -- cgit v1.2.3 From 6b70fc94afd165342876e53fc4b2f7d085009945 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 20 Mar 2019 14:25:05 -0400 Subject: net-sysfs: Fix memory leak in netdev_register_kobject When registering struct net_device, it will call register_netdevice -> netdev_register_kobject -> device_initialize(dev); dev_set_name(dev, "%s", ndev->name) device_add(dev) register_queue_kobjects(ndev) In netdev_register_kobject(), if device_add(dev) or register_queue_kobjects(ndev) failed. Register_netdevice() will return error, causing netdev_freemem(ndev) to be called to free net_device, however put_device(&dev->dev)->..-> kobject_cleanup() won't be called, resulting in a memory leak. syzkaller report this: BUG: memory leak unreferenced object 0xffff8881f4fad168 (size 8): comm "syz-executor.0", pid 3575, jiffies 4294778002 (age 20.134s) hex dump (first 8 bytes): 77 70 61 6e 30 00 ff ff wpan0... backtrace: [<000000006d2d91d7>] kstrdup_const+0x3d/0x50 mm/util.c:73 [<00000000ba9ff953>] kvasprintf_const+0x112/0x170 lib/kasprintf.c:48 [<000000005555ec09>] kobject_set_name_vargs+0x55/0x130 lib/kobject.c:281 [<0000000098d28ec3>] dev_set_name+0xbb/0xf0 drivers/base/core.c:1915 [<00000000b7553017>] netdev_register_kobject+0xc0/0x410 net/core/net-sysfs.c:1727 [<00000000c826a797>] register_netdevice+0xa51/0xeb0 net/core/dev.c:8711 [<00000000857bfcfd>] cfg802154_update_iface_num.isra.2+0x13/0x90 [ieee802154] [<000000003126e453>] ieee802154_llsec_fill_key_id+0x1d5/0x570 [ieee802154] [<00000000e4b3df51>] 0xffffffffc1500e0e [<00000000b4319776>] platform_drv_probe+0xc6/0x180 drivers/base/platform.c:614 [<0000000037669347>] really_probe+0x491/0x7c0 drivers/base/dd.c:509 [<000000008fed8862>] driver_probe_device+0xdc/0x240 drivers/base/dd.c:671 [<00000000baf52041>] device_driver_attach+0xf2/0x130 drivers/base/dd.c:945 [<00000000c7cc8dec>] __driver_attach+0x10e/0x210 drivers/base/dd.c:1022 [<0000000057a757c2>] bus_for_each_dev+0x154/0x1e0 drivers/base/bus.c:304 [<000000005f5ae04b>] bus_add_driver+0x427/0x5e0 drivers/base/bus.c:645 Reported-by: Hulk Robot Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Wang Hai Reviewed-by: Andy Shevchenko Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8f8b7b6c2945..f8f94303a1f5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1747,16 +1747,20 @@ int netdev_register_kobject(struct net_device *ndev) error = device_add(dev); if (error) - return error; + goto error_put_device; error = register_queue_kobjects(ndev); - if (error) { - device_del(dev); - return error; - } + if (error) + goto error_device_del; pm_runtime_set_memalloc_noio(dev, true); + return 0; + +error_device_del: + device_del(dev); +error_put_device: + put_device(dev); return error; } -- cgit v1.2.3 From 408f13ef358aa5ad56dc6230c2c7deb92cf462b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Mar 2019 09:39:52 +0800 Subject: rhashtable: Still do rehash when we get EEXIST As it stands if a shrink is delayed because of an outstanding rehash, we will go into a rescheduling loop without ever doing the rehash. This patch fixes this by still carrying out the rehash and then rescheduling so that we can shrink after the completion of the rehash should it still be necessary. The return value of EEXIST captures this case and other cases (e.g., another thread expanded/rehashed the table at the same time) where we should still proceed with the rehash. Fixes: da20420f83ea ("rhashtable: Add nested tables") Reported-by: Josh Elsasser Signed-off-by: Herbert Xu Tested-by: Josh Elsasser Signed-off-by: David S. Miller --- lib/rhashtable.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 0a105d4af166..97f59abc3e92 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -416,8 +416,12 @@ static void rht_deferred_worker(struct work_struct *work) else if (tbl->nest) err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - if (!err) - err = rhashtable_rehash_table(ht); + if (!err || err == -EEXIST) { + int nerr; + + nerr = rhashtable_rehash_table(ht); + err = err ?: nerr; + } mutex_unlock(&ht->mutex); -- cgit v1.2.3 From 5f543a54eec08228ab0cc0a49cf5d79dd32c9e5e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 21 Mar 2019 11:28:43 +0800 Subject: net: hns3: fix for not calculating tx bd num correctly When there is only one byte in a frag, the current calculation using "(size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET" will return zero, because HNS3_MAX_BD_SIZE is 65535 and HNS3_MAX_BD_SIZE_OFFSET is 16. So it will cause tx error when a frag's size is one byte. This patch fixes it by using DIV_ROUND_UP. Fixes: 3fe13ed95dd3 ("net: hns3: avoid mult + div op in critical data path") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 13 ++++++------- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 1c1f17ec6be2..162cb9afa0e7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -22,6 +22,7 @@ #include "hns3_enet.h" #define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift))) +#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE) static void hns3_clear_all_ring(struct hnae3_handle *h); static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h); @@ -1079,7 +1080,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc_cb->length = size; - frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET; + frag_buf_num = hns3_tx_bd_count(size); sizeoflast = size & HNS3_TX_LAST_SIZE_M; sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE; @@ -1124,14 +1125,13 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum, int i; size = skb_headlen(skb); - buf_num = (size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET; + buf_num = hns3_tx_bd_count(size); frag_num = skb_shinfo(skb)->nr_frags; for (i = 0; i < frag_num; i++) { frag = &skb_shinfo(skb)->frags[i]; size = skb_frag_size(frag); - bdnum_for_frag = (size + HNS3_MAX_BD_SIZE - 1) >> - HNS3_MAX_BD_SIZE_OFFSET; + bdnum_for_frag = hns3_tx_bd_count(size); if (unlikely(bdnum_for_frag > HNS3_MAX_BD_PER_FRAG)) return -ENOMEM; @@ -1139,8 +1139,7 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum, } if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) { - buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) >> - HNS3_MAX_BD_SIZE_OFFSET; + buf_num = hns3_tx_bd_count(skb->len); if (ring_space(ring) < buf_num) return -EBUSY; /* manual split the send packet */ @@ -1169,7 +1168,7 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum, buf_num = skb_shinfo(skb)->nr_frags + 1; if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) { - buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE; + buf_num = hns3_tx_bd_count(skb->len); if (ring_space(ring) < buf_num) return -EBUSY; /* manual split the send packet */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 1db0bd41d209..75669cd0c311 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -193,7 +193,6 @@ enum hns3_nic_state { #define HNS3_VECTOR_INITED 1 #define HNS3_MAX_BD_SIZE 65535 -#define HNS3_MAX_BD_SIZE_OFFSET 16 #define HNS3_MAX_BD_PER_FRAG 8 #define HNS3_MAX_BD_PER_PKT MAX_SKB_FRAGS -- cgit v1.2.3 From 83d163124cf1104cca5b668d5fe6325715a60855 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Mar 2019 14:34:36 -0700 Subject: bpf: verifier: propagate liveness on all frames Commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") connected up parentage chains of all frames of the stack. It didn't, however, ensure propagate_liveness() propagates all liveness information along those chains. This means pruning happening in the callee may generate explored states with incomplete liveness for the chains in lower frames of the stack. The included selftest is similar to the prior one from commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences"), where callee would prune regardless of the difference in r8 state. Now we also initialize r9 to 0 or 1 based on a result from get_random(). r9 is never read so the walk with r9 = 0 gets pruned (correctly) after the walk with r9 = 1 completes. The selftest is so arranged that the pruning will happen in the callee. Since callee does not propagate read marks of r8, the explored state at the pruning point prior to the callee will now ignore r8. Propagate liveness on all frames of the stack when pruning. Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 20 +++++++++++--------- tools/testing/selftests/bpf/verifier/calls.c | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f19d5e04c69d..fd502c1f71eb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6078,15 +6078,17 @@ static int propagate_liveness(struct bpf_verifier_env *env, } /* Propagate read liveness of registers... */ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - /* We don't need to worry about FP liveness because it's read-only */ - for (i = 0; i < BPF_REG_FP; i++) { - if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ) - continue; - if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) { - err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i], - &vparent->frame[vstate->curframe]->regs[i]); - if (err) - return err; + for (frame = 0; frame <= vstate->curframe; frame++) { + /* We don't need to worry about FP liveness, it's read-only */ + for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { + if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ) + continue; + if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) { + err = mark_reg_read(env, &vstate->frame[frame]->regs[i], + &vparent->frame[frame]->regs[i]); + if (err) + return err; + } } } diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 4004891afa9c..f2ccae39ee66 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1940,3 +1940,28 @@ .errstr = "!read_ok", .result = REJECT, }, +{ + "calls: cross frame pruning - liveness propagation", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr = "!read_ok", + .result = REJECT, +}, -- cgit v1.2.3 From c8248c6c1a3d5db944753dd8e1c143d92c2c74fc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 21 Mar 2019 21:23:14 +0100 Subject: r8169: don't read interrupt mask register in interrupt handler After the original patch network starts to crash on heavy load. It's not fully clear why this additional register read has such side effects, but removing it fixes the issue. Thanks also to Alex for his contribution and hints. [0] https://marc.info/?t=155268170400002&r=1&w=2 Fixes: e782410ed237 ("r8169: improve spurious interrupt detection") Reported-by: VDR User Tested-by: VDR User Signed-off-by: Heiner Kallweit Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index c29dde064078..9dd1cd2c0c68 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -678,6 +678,7 @@ struct rtl8169_private { struct work_struct work; } wk; + unsigned irq_enabled:1; unsigned supports_gmii:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; @@ -1293,6 +1294,7 @@ static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) static void rtl_irq_disable(struct rtl8169_private *tp) { RTL_W16(tp, IntrMask, 0); + tp->irq_enabled = 0; } #define RTL_EVENT_NAPI_RX (RxOK | RxErr) @@ -1301,6 +1303,7 @@ static void rtl_irq_disable(struct rtl8169_private *tp) static void rtl_irq_enable(struct rtl8169_private *tp) { + tp->irq_enabled = 1; RTL_W16(tp, IntrMask, tp->irq_mask); } @@ -6520,9 +6523,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) { struct rtl8169_private *tp = dev_instance; u16 status = RTL_R16(tp, IntrStatus); - u16 irq_mask = RTL_R16(tp, IntrMask); - if (status == 0xffff || !(status & irq_mask)) + if (!tp->irq_enabled || status == 0xffff || !(status & tp->irq_mask)) return IRQ_NONE; if (unlikely(status & SYSErr)) { -- cgit v1.2.3 From a7fb107b7d8982ac76c958a0d2838a151b03e97e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 21 Mar 2019 16:34:44 -0700 Subject: net: phy: Re-parent menus for MDIO bus drivers correctly After 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") the various MDIO bus drivers were no longer parented with config PHYLIB but with config MDIO_BUS which is not a menuconfig, fix this by depending on MDIO_DEVICE which is a menuconfig. This is visually nicer and less confusing for users. Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 071869db44cf..520657945b82 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -7,6 +7,8 @@ menuconfig MDIO_DEVICE help MDIO devices and driver infrastructure code. +if MDIO_DEVICE + config MDIO_BUS tristate default m if PHYLIB=m @@ -179,6 +181,7 @@ config MDIO_XGENE APM X-Gene SoC's. endif +endif config PHYLINK tristate -- cgit v1.2.3 From fa3a419d2f674b431d38748cb58fb7da17ee8949 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:07 +0800 Subject: net: xilinx: fix possible object reference leak The call to of_parse_phandle returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1624:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1569, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Anirudha Sarangi Cc: John Linn Cc: "David S. Miller" Cc: Michal Simek Cc: netdev@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index ec7e7ec24ff9..4041c75997ba 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1575,12 +1575,14 @@ static int axienet_probe(struct platform_device *pdev) ret = of_address_to_resource(np, 0, &dmares); if (ret) { dev_err(&pdev->dev, "unable to get DMA resource\n"); + of_node_put(np); goto free_netdev; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); ret = PTR_ERR(lp->dma_regs); + of_node_put(np); goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); -- cgit v1.2.3 From be693df3cf9dd113ff1d2c0d8150199efdba37f6 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:08 +0800 Subject: net: ibm: fix possible object reference leak The call to ehea_get_eth_dn returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/ibm/ehea/ehea_main.c:3163:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3154, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Douglas Miller Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 3baabdc89726..90b62c1412c8 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3160,6 +3160,7 @@ static ssize_t ehea_probe_port(struct device *dev, if (ehea_add_adapter_mr(adapter)) { pr_err("creating MR failed\n"); + of_node_put(eth_dn); return -EIO; } -- cgit v1.2.3 From 75eac7b5f68b0a0671e795ac636457ee27cc11d8 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:09 +0800 Subject: net: ethernet: ti: fix possible object reference leak The call to of_get_child_by_name returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/ti/netcp_ethss.c:3661:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3654, but without a corresponding object release within this function. ./drivers/net/ethernet/ti/netcp_ethss.c:3665:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3654, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Wingman Kwok Cc: Murali Karicheri Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_ethss.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 5174d318901e..0a920c5936b2 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3657,12 +3657,16 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device, gbe_dev->dma_chan_name, gbe_dev->tx_queue_id); - if (ret) + if (ret) { + of_node_put(interfaces); return ret; + } ret = netcp_txpipe_open(&gbe_dev->tx_pipe); - if (ret) + if (ret) { + of_node_put(interfaces); return ret; + } /* Create network interfaces */ INIT_LIST_HEAD(&gbe_dev->gbe_intf_head); -- cgit v1.2.3 From 23c78343ec36990709b636a9e02bad814f4384ad Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Mar 2019 07:39:35 +0100 Subject: r8169: fix cable re-plugging issue Bartek reported that after few cable unplug/replug cycles suddenly replug isn't detected any longer. His system uses a RTL8106, I wasn't able to reproduce the issue with RTL8168g. According to his bisect the referenced commit caused the regression. As Realtek doesn't release datasheets or errata it's hard to say what's the actual root cause, but this change was reported to fix the issue. Fixes: 38caff5a445b ("r8169: handle all interrupt events in the hard irq handler") Reported-by: Bartosz Skrzypczak Suggested-by: Bartosz Skrzypczak Tested-by: Bartosz Skrzypczak Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 9dd1cd2c0c68..7562ccbbb39a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6542,7 +6542,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags); } - if (status & RTL_EVENT_NAPI) { + if (status & (RTL_EVENT_NAPI | LinkChg)) { rtl_irq_disable(tp); napi_schedule_irqoff(&tp->napi); } -- cgit v1.2.3 From 064c5d6881e897077639e04973de26440ee205e6 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Fri, 22 Mar 2019 12:37:35 +0000 Subject: net: sched: fix cleanup NULL pointer exception in act_mirr A new mirred action is created by the tcf_mirred_init function. This contains a list head struct which is inserted into a global list on successful creation of a new action. However, after a creation, it is still possible to error out and call the tcf_idr_release function. This, in turn, calls the act_mirr cleanup function via __tcf_idr_release and __tcf_action_put. This cleanup function tries to delete the list entry which is as yet uninitialised, leading to a NULL pointer exception. Fix this by initialising the list entry on creation of a new action. Bug report: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 8000000840c73067 P4D 8000000840c73067 PUD 858dcc067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 32 PID: 5636 Comm: handler194 Tainted: G OE 5.0.0+ #186 Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.3.6 06/03/2015 RIP: 0010:tcf_mirred_release+0x42/0xa7 [act_mirred] Code: f0 90 39 c0 e8 52 04 57 c8 48 c7 c7 b8 80 39 c0 e8 94 fa d4 c7 48 8b 93 d0 00 00 00 48 8b 83 d8 00 00 00 48 c7 c7 f0 90 39 c0 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 d0 00 RSP: 0018:ffffac4aa059f688 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff9dcd1b214d00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9dcd1fa165f8 RDI: ffffffffc03990f0 RBP: ffff9dccf9c7af80 R08: 0000000000000a3b R09: 0000000000000000 R10: ffff9dccfa11f420 R11: 0000000000000000 R12: 0000000000000001 R13: ffff9dcd16b433c0 R14: ffff9dcd1b214d80 R15: 0000000000000000 FS: 00007f441bfff700(0000) GS:ffff9dcd1fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000839e64004 CR4: 00000000001606e0 Call Trace: tcf_action_cleanup+0x59/0xca __tcf_action_put+0x54/0x6b __tcf_idr_release.cold.33+0x9/0x12 tcf_mirred_init.cold.20+0x22e/0x3b0 [act_mirred] tcf_action_init_1+0x3d0/0x4c0 tcf_action_init+0x9c/0x130 tcf_exts_validate+0xab/0xc0 fl_change+0x1ca/0x982 [cls_flower] tc_new_tfilter+0x647/0x8d0 ? load_balance+0x14b/0x9e0 rtnetlink_rcv_msg+0xe3/0x370 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1d4/0x2b0 ? rtnl_calcit.isra.31+0xf0/0xf0 netlink_rcv_skb+0x49/0x110 netlink_unicast+0x16f/0x210 netlink_sendmsg+0x1df/0x390 sock_sendmsg+0x36/0x40 ___sys_sendmsg+0x27b/0x2c0 ? futex_wake+0x80/0x140 ? do_futex+0x2b9/0xac0 ? ep_scan_ready_list.constprop.22+0x1f2/0x210 ? ep_poll+0x7a/0x430 __sys_sendmsg+0x47/0x80 do_syscall_64+0x55/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index cd712e4e8998..17cc6bd4c57c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -159,12 +159,15 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + + m = to_mirred(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&m->tcfm_list); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; - m = to_mirred(*a); - spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { -- cgit v1.2.3 From 737889efe9713a0f20a75fd0de952841d9275e6b Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 22 Mar 2019 15:03:51 +0100 Subject: tipc: tipc clang warning When checking the code with clang -Wsometimes-uninitialized we get the following warning: if (!tipc_link_is_establishing(l)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/node.c:847:46: note: uninitialized use occurs here tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); net/tipc/node.c:831:2: note: remove the 'if' if its condition is always true if (!tipc_link_is_establishing(l)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/node.c:821:31: note: initialize the variable 'maddr' to silence this warning struct tipc_media_addr *maddr; We fix this by initializing 'maddr' to NULL. For the matter of clarity, we also test if 'xmitq' is non-empty before we use it and 'maddr' further down in the function. It will never happen that 'xmitq' is non- empty at the same time as 'maddr' is NULL, so this is a sufficient test. Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic") Reported-by: Nathan Chancellor Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2dc4919ab23c..dd3b6dc17662 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -817,10 +817,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; - struct tipc_media_addr *maddr; - struct sk_buff_head xmitq; int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; if (!l) return; @@ -844,7 +844,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } -- cgit v1.2.3 From 526949e877f44672d408bfe291e39860c13f2e24 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2019 15:18:43 +0100 Subject: rxrpc: avoid clang -Wuninitialized warning clang produces a false-positive warning as it fails to notice that "lost = true" implies that "ret" is initialized: net/rxrpc/output.c:402:6: error: variable 'ret' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] if (lost) ^~~~ net/rxrpc/output.c:437:6: note: uninitialized use occurs here if (ret >= 0) { ^~~ net/rxrpc/output.c:402:2: note: remove the 'if' if its condition is always false if (lost) ^~~~~~~~~ net/rxrpc/output.c:339:9: note: initialize the variable 'ret' to silence this warning int ret, opt; ^ = 0 Rearrange the code to make that more obvious and avoid the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: David S. Miller --- net/rxrpc/output.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 736aa9281100..004c762c2e8d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -335,7 +335,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, struct kvec iov[2]; rxrpc_serial_t serial; size_t len; - bool lost = false; int ret, opt; _enter(",{%d}", skb->len); @@ -393,14 +392,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, static int lose; if ((lose++ & 7) == 7) { ret = 0; - lost = true; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, retrans, true); + goto done; } } - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, - retrans, lost); - if (lost) - goto done; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, + false); /* send the packet with the don't fragment bit set if we currently * think it's small enough */ -- cgit v1.2.3 From 1d382264d911d91a8be5dbed1f0e053eb3245d81 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Mar 2019 01:49:10 +0100 Subject: bpf, libbpf: fix version info and add it to shared object Even though libbpf's versioning script for the linker (libbpf.map) is pointing to 0.0.2, the BPF_EXTRAVERSION in the Makefile has not been updated along with it and is therefore still on 0.0.1. While fixing up, I also noticed that the generated shared object versioning information is missing, typical convention is to have a linker name (libbpf.so), soname (libbpf.so.0) and real name (libbpf.so.0.0.2) for library management. This is based upon the LIBBPF_VERSION as well. The build will then produce the following bpf libraries: # ll libbpf* libbpf.a libbpf.so -> libbpf.so.0.0.2 libbpf.so.0 -> libbpf.so.0.0.2 libbpf.so.0.0.2 # readelf -d libbpf.so.0.0.2 | grep SONAME 0x000000000000000e (SONAME) Library soname: [libbpf.so.0] And install them accordingly: # rm -rf /tmp/bld; mkdir /tmp/bld; make -j$(nproc) O=/tmp/bld install Auto-detecting system features: ... libelf: [ on ] ... bpf: [ on ] CC /tmp/bld/libbpf.o CC /tmp/bld/bpf.o CC /tmp/bld/nlattr.o CC /tmp/bld/btf.o CC /tmp/bld/libbpf_errno.o CC /tmp/bld/str_error.o CC /tmp/bld/netlink.o CC /tmp/bld/bpf_prog_linfo.o CC /tmp/bld/libbpf_probes.o CC /tmp/bld/xsk.o LD /tmp/bld/libbpf-in.o LINK /tmp/bld/libbpf.a LINK /tmp/bld/libbpf.so.0.0.2 LINK /tmp/bld/test_libbpf INSTALL /tmp/bld/libbpf.a INSTALL /tmp/bld/libbpf.so.0.0.2 # ll /usr/local/lib64/libbpf.* /usr/local/lib64/libbpf.a /usr/local/lib64/libbpf.so -> libbpf.so.0.0.2 /usr/local/lib64/libbpf.so.0 -> libbpf.so.0.0.2 /usr/local/lib64/libbpf.so.0.0.2 Fixes: 1bf4b05810fe ("tools: bpftool: add probes for eBPF program types") Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature check") Reported-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 61aaacf0cfa1..5bf8e52c41fc 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 1 +BPF_EXTRAVERSION = 2 MAKEFLAGS += --no-print-directory @@ -79,8 +79,6 @@ export prefix libdir src obj libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -LIB_FILE = libbpf.a libbpf.so - VERSION = $(BPF_VERSION) PATCHLEVEL = $(BPF_PATCHLEVEL) EXTRAVERSION = $(BPF_EXTRAVERSION) @@ -88,7 +86,10 @@ EXTRAVERSION = $(BPF_EXTRAVERSION) OBJ = $@ N = -LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) +LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) + +LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) +LIB_FILE = libbpf.a libbpf.so* # Set compile option CFLAGS ifdef EXTRA_CFLAGS @@ -128,16 +129,18 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o -LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) -VERSION_SCRIPT := libbpf.map +BPF_IN := $(OUTPUT)libbpf-in.o +VERSION_SCRIPT := libbpf.map + +LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_FILE) +CMD_TARGETS = $(LIB_TARGET) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -170,9 +173,13 @@ $(BPF_IN): force elfdep bpfdep echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf -$(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ - $^ -o $@ +$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) + +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libbpf.so + @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ @@ -192,6 +199,12 @@ check_abi: $(OUTPUT)libbpf.so exit 1; \ fi +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -200,8 +213,9 @@ define do_install endef install_lib: all_cmd - $(call QUIET_INSTALL, $(LIB_FILE)) \ - $(call do_install,$(LIB_FILE),$(libdir_SQ)) + $(call QUIET_INSTALL, $(LIB_TARGET)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) install_headers: $(call QUIET_INSTALL, headers) \ @@ -219,7 +233,7 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -- cgit v1.2.3 From 63197f78bca2d86093126783b0ee6519bd652435 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Mar 2019 01:49:11 +0100 Subject: bpf, libbpf: clarify bump in libbpf version info The current documentation suggests that we would need to bump the libbpf version on every change. Lets clarify this a bit more and reflect what we do today in practice, that is, bumping it once per development cycle. Fixes: 76d1b894c515 ("libbpf: Document API and ABI conventions") Reported-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst index 5788479384ca..cef7b77eab69 100644 --- a/tools/lib/bpf/README.rst +++ b/tools/lib/bpf/README.rst @@ -111,6 +111,7 @@ starting from ``0.0.1``. Every time ABI is being changed, e.g. because a new symbol is added or semantic of existing symbol is changed, ABI version should be bumped. +This bump in ABI version is at most once per kernel development cycle. For example, if current state of ``libbpf.map`` is: -- cgit v1.2.3 From d29f5aa0bc0c321e1b9e4658a2a7e08e885da52a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Mar 2019 20:00:20 +0100 Subject: net: phy: don't clear BMCR in genphy_soft_reset So far we effectively clear the BMCR register. Some PHY's can deal with this (e.g. because they reset BMCR to a default as part of a soft-reset) whilst on others this causes issues because e.g. the autoneg bit is cleared. Marvell is an example, see also thread [0]. So let's be a little bit more gentle and leave all bits we're not interested in as-is. This change is needed for PHY drivers to properly deal with the original patch. [0] https://marc.info/?t=155264050700001&r=1&w=2 Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset") Tested-by: Phil Reid Tested-by: liweihang Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 49fdd1ee798e..77068c545de0 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1831,7 +1831,7 @@ int genphy_soft_reset(struct phy_device *phydev) { int ret; - ret = phy_write(phydev, MII_BMCR, BMCR_RESET); + ret = phy_set_bits(phydev, MII_BMCR, BMCR_RESET); if (ret < 0) return ret; -- cgit v1.2.3 From fb1eb41a3dd4cfff274c98f3c3324ab329641298 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:00 +0100 Subject: dt-bindings: net: dsa: qca8k: fix example In the example, the phy at phy@0 is clashing with the switch0@0 at the same address. Usually, the switches are accessible through pseudo PHYs which in case of the qca8k are located at 0x10 - 0x18. Reviewed-by: Florian Fainelli Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index bbcb255c3150..5eda99e6c86e 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -55,12 +55,12 @@ Example: reg = <4>; }; - switch0@0 { + switch@10 { compatible = "qca,qca8337"; #address-cells = <1>; #size-cells = <0>; - reg = <0>; + reg = <0x10>; ports { #address-cells = <1>; -- cgit v1.2.3 From 5e07321f3388e6f2b13c43ae9df3e09efa8418e0 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:01 +0100 Subject: dt-bindings: net: dsa: qca8k: support internal mdio-bus This patch updates the qca8k's binding to document to the approach for using the internal mdio-bus of the supported qca8k switches. Reviewed-by: Florian Fainelli Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 69 ++++++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 5eda99e6c86e..93a7469e70d4 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -12,10 +12,15 @@ Required properties: Subnodes: The integrated switch subnode should be specified according to the binding -described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of -port and PHY id, each subnode describing a port needs to have a valid phandle -referencing the internal PHY connected to it. The CPU port of this switch is -always port 0. +described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external +mdio-bus each subnode describing a port needs to have a valid phandle +referencing the internal PHY it is connected to. This is because there's no +N:N mapping of port and PHY id. + +Don't use mixed external and internal mdio-bus configurations, as this is +not supported by the hardware. + +The CPU port of this switch is always port 0. A CPU port node has the following optional node: @@ -31,8 +36,9 @@ For QCA8K the 'fixed-link' sub-node supports only the following properties: - 'full-duplex' (boolean, optional), to indicate that full duplex is used. When absent, half duplex is assumed. -Example: +Examples: +for the external mdio-bus configuration: &mdio0 { phy_port1: phy@0 { @@ -108,3 +114,56 @@ Example: }; }; }; + +for the internal master mdio-bus configuration: + + &mdio0 { + switch@10 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0x10>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + fixed-link { + speed = 1000; + full-duplex; + }; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "lan4"; + }; + + port@5 { + reg = <5>; + label = "wan"; + }; + }; + }; + }; -- cgit v1.2.3 From 1eec7151ae0e134bd42e3f128066b2ff8da21393 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:02 +0100 Subject: net: dsa: qca8k: remove leftover phy accessors This belated patch implements Andrew Lunn's request of "remove the phy_read() and phy_write() functions." While seemingly harmless, this causes the switch's user port PHYs to get registered twice. This is because the DSA subsystem will create a slave mdio-bus not knowing that the qca8k_phy_(read|write) accessors operate on the external mdio-bus. So the same "bus" gets effectively duplicated. Cc: stable@vger.kernel.org Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 576b37d12a63..14ad78225f07 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -624,22 +624,6 @@ qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy) qca8k_port_set_status(priv, port, 1); } -static int -qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - return mdiobus_read(priv->bus, phy, regnum); -} - -static int -qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - return mdiobus_write(priv->bus, phy, regnum, val); -} - static void qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) { @@ -879,8 +863,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .setup = qca8k_setup, .adjust_link = qca8k_adjust_link, .get_strings = qca8k_get_strings, - .phy_read = qca8k_phy_read, - .phy_write = qca8k_phy_write, .get_ethtool_stats = qca8k_get_ethtool_stats, .get_sset_count = qca8k_get_sset_count, .get_mac_eee = qca8k_get_mac_eee, -- cgit v1.2.3 From db460c54b67fc2cbe6dcef88b7bf3cba8e07f80e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:03 +0100 Subject: net: dsa: qca8k: extend slave-bus implementations This patch implements accessors for the QCA8337 MDIO access through the MDIO_MASTER register, which makes it possible to access the PHYs on slave-bus through the switch. In cases where the switch ports are already mapped via external "phy-phandles", the internal mdio-bus is disabled in order to prevent a duplicated discovery and enumeration of the same PHYs. Don't use mixed external and internal mdio-bus configurations, as this is not supported by the hardware. Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++- drivers/net/dsa/qca8k.h | 13 ++++ 2 files changed, 168 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 14ad78225f07..c4fa400efdcc 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -481,6 +481,155 @@ qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask); } +static u32 +qca8k_port_to_phy(int port) +{ + /* From Andrew Lunn: + * Port 0 has no internal phy. + * Port 1 has an internal PHY at MDIO address 0. + * Port 2 has an internal PHY at MDIO address 1. + * ... + * Port 5 has an internal PHY at MDIO address 4. + * Port 6 has no internal PHY. + */ + + return port - 1; +} + +static int +qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data) +{ + u32 phy, val; + + if (regnum >= QCA8K_MDIO_MASTER_MAX_REG) + return -EINVAL; + + /* callee is responsible for not passing bad ports, + * but we still would like to make spills impossible. + */ + phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR; + val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN | + QCA8K_MDIO_MASTER_WRITE | QCA8K_MDIO_MASTER_PHY_ADDR(phy) | + QCA8K_MDIO_MASTER_REG_ADDR(regnum) | + QCA8K_MDIO_MASTER_DATA(data); + + qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val); + + return qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL, + QCA8K_MDIO_MASTER_BUSY); +} + +static int +qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum) +{ + u32 phy, val; + + if (regnum >= QCA8K_MDIO_MASTER_MAX_REG) + return -EINVAL; + + /* callee is responsible for not passing bad ports, + * but we still would like to make spills impossible. + */ + phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR; + val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN | + QCA8K_MDIO_MASTER_READ | QCA8K_MDIO_MASTER_PHY_ADDR(phy) | + QCA8K_MDIO_MASTER_REG_ADDR(regnum); + + qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val); + + if (qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL, + QCA8K_MDIO_MASTER_BUSY)) + return -ETIMEDOUT; + + val = (qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL) & + QCA8K_MDIO_MASTER_DATA_MASK); + + return val; +} + +static int +qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data) +{ + struct qca8k_priv *priv = ds->priv; + + return qca8k_mdio_write(priv, port, regnum, data); +} + +static int +qca8k_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + struct qca8k_priv *priv = ds->priv; + int ret; + + ret = qca8k_mdio_read(priv, port, regnum); + + if (ret < 0) + return 0xffff; + + return ret; +} + +static int +qca8k_setup_mdio_bus(struct qca8k_priv *priv) +{ + u32 internal_mdio_mask = 0, external_mdio_mask = 0, reg; + struct device_node *ports, *port; + int err; + + ports = of_get_child_by_name(priv->dev->of_node, "ports"); + if (!ports) + return -EINVAL; + + for_each_available_child_of_node(ports, port) { + err = of_property_read_u32(port, "reg", ®); + if (err) + return err; + + if (!dsa_is_user_port(priv->ds, reg)) + continue; + + if (of_property_read_bool(port, "phy-handle")) + external_mdio_mask |= BIT(reg); + else + internal_mdio_mask |= BIT(reg); + } + + if (!external_mdio_mask && !internal_mdio_mask) { + dev_err(priv->dev, "no PHYs are defined.\n"); + return -EINVAL; + } + + /* The QCA8K_MDIO_MASTER_EN Bit, which grants access to PHYs through + * the MDIO_MASTER register also _disconnects_ the external MDC + * passthrough to the internal PHYs. It's not possible to use both + * configurations at the same time! + * + * Because this came up during the review process: + * If the external mdio-bus driver is capable magically disabling + * the QCA8K_MDIO_MASTER_EN and mutex/spin-locking out the qca8k's + * accessors for the time being, it would be possible to pull this + * off. + */ + if (!!external_mdio_mask && !!internal_mdio_mask) { + dev_err(priv->dev, "either internal or external mdio bus configuration is supported.\n"); + return -EINVAL; + } + + if (external_mdio_mask) { + /* Make sure to disable the internal mdio bus in cases + * a dt-overlay and driver reload changed the configuration + */ + + qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL, + QCA8K_MDIO_MASTER_EN); + return 0; + } + + priv->ops.phy_read = qca8k_phy_read; + priv->ops.phy_write = qca8k_phy_write; + return 0; +} + static int qca8k_setup(struct dsa_switch *ds) { @@ -502,6 +651,10 @@ qca8k_setup(struct dsa_switch *ds) if (IS_ERR(priv->regmap)) pr_warn("regmap initialization failed"); + ret = qca8k_setup_mdio_bus(priv); + if (ret) + return ret; + /* Initialize CPU port pad mode (xMII type, delays...) */ phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn); if (phy_mode < 0) { @@ -905,7 +1058,8 @@ qca8k_sw_probe(struct mdio_device *mdiodev) return -ENOMEM; priv->ds->priv = priv; - priv->ds->ops = &qca8k_switch_ops; + priv->ops = qca8k_switch_ops; + priv->ds->ops = &priv->ops; mutex_init(&priv->reg_mutex); dev_set_drvdata(&mdiodev->dev, priv); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index d146e54c8a6c..249fd62268e5 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -49,6 +49,18 @@ #define QCA8K_MIB_FLUSH BIT(24) #define QCA8K_MIB_CPU_KEEP BIT(20) #define QCA8K_MIB_BUSY BIT(17) +#define QCA8K_MDIO_MASTER_CTRL 0x3c +#define QCA8K_MDIO_MASTER_BUSY BIT(31) +#define QCA8K_MDIO_MASTER_EN BIT(30) +#define QCA8K_MDIO_MASTER_READ BIT(27) +#define QCA8K_MDIO_MASTER_WRITE 0 +#define QCA8K_MDIO_MASTER_SUP_PRE BIT(26) +#define QCA8K_MDIO_MASTER_PHY_ADDR(x) ((x) << 21) +#define QCA8K_MDIO_MASTER_REG_ADDR(x) ((x) << 16) +#define QCA8K_MDIO_MASTER_DATA(x) (x) +#define QCA8K_MDIO_MASTER_DATA_MASK GENMASK(15, 0) +#define QCA8K_MDIO_MASTER_MAX_PORTS 5 +#define QCA8K_MDIO_MASTER_MAX_REG 32 #define QCA8K_GOL_MAC_ADDR0 0x60 #define QCA8K_GOL_MAC_ADDR1 0x64 #define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) @@ -169,6 +181,7 @@ struct qca8k_priv { struct dsa_switch *ds; struct mutex reg_mutex; struct device *dev; + struct dsa_switch_ops ops; }; struct qca8k_mib_desc { -- cgit v1.2.3 From 1f8389bf63aec99218c62490869ca38d1a38ce46 Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Sat, 23 Mar 2019 19:11:33 +0530 Subject: net: sched: Kconfig: update reference link for PIE RFC 8033 replaces the IETF draft for PIE Signed-off-by: Leslie Monis Signed-off-by: David S. Miller --- net/sched/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1b9afdee5ba9..5c02ad97ef23 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -358,8 +358,7 @@ config NET_SCH_PIE help Say Y here if you want to use the Proportional Integral controller Enhanced scheduler packet scheduling algorithm. - For more information, please see - http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + For more information, please see https://tools.ietf.org/html/rfc8033 To compile this driver as a module, choose M here: the module will be called sch_pie. -- cgit v1.2.3 From b7ebee2f95fb0fa2862d5ed2de707f872c311393 Mon Sep 17 00:00:00 2001 From: Dmitry Bezrukov Date: Sat, 23 Mar 2019 13:59:53 +0000 Subject: net: usb: aqc111: Extend HWID table by QNAP device New device of QNAP based on aqc111u Add this ID to blacklist of cdc_ether driver as well Signed-off-by: Dmitry Bezrukov Signed-off-by: David S. Miller --- drivers/net/usb/aqc111.c | 15 +++++++++++++++ drivers/net/usb/cdc_ether.c | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 820a2fe7d027..aff995be2a31 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1301,6 +1301,20 @@ static const struct driver_info trendnet_info = { .tx_fixup = aqc111_tx_fixup, }; +static const struct driver_info qnap_info = { + .description = "QNAP QNA-UC5G1T USB to 5GbE Adapter", + .bind = aqc111_bind, + .unbind = aqc111_unbind, + .status = aqc111_status, + .link_reset = aqc111_link_reset, + .reset = aqc111_reset, + .stop = aqc111_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | + FLAG_AVOID_UNLINK_URBS | FLAG_MULTI_PACKET, + .rx_fixup = aqc111_rx_fixup, + .tx_fixup = aqc111_tx_fixup, +}; + static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1455,6 +1469,7 @@ static const struct usb_device_id products[] = { {AQC111_USB_ETH_DEV(0x0b95, 0x2790, asix111_info)}, {AQC111_USB_ETH_DEV(0x0b95, 0x2791, asix112_info)}, {AQC111_USB_ETH_DEV(0x20f4, 0xe05a, trendnet_info)}, + {AQC111_USB_ETH_DEV(0x1c04, 0x0015, qnap_info)}, { },/* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 5512a1038721..3e9b2c319e45 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -851,6 +851,14 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* QNAP QNA-UC5G1T USB to 5GbE Adapter (based on AQC111U) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(0x1c04, 0x0015, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. -- cgit v1.2.3 From 9926cb5f8b0f0aea535735185600d74db7608550 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 24 Mar 2019 00:48:22 +0800 Subject: tipc: change to check tipc_own_id to return in tipc_net_stop When running a syz script, a panic occurred: [ 156.088228] BUG: KASAN: use-after-free in tipc_disc_timeout+0x9c9/0xb20 [tipc] [ 156.094315] Call Trace: [ 156.094844] [ 156.095306] dump_stack+0x7c/0xc0 [ 156.097346] print_address_description+0x65/0x22e [ 156.100445] kasan_report.cold.3+0x37/0x7a [ 156.102402] tipc_disc_timeout+0x9c9/0xb20 [tipc] [ 156.106517] call_timer_fn+0x19a/0x610 [ 156.112749] run_timer_softirq+0xb51/0x1090 It was caused by the netns freed without deleting the discoverer timer, while later on the netns would be accessed in the timer handler. The timer should have been deleted by tipc_net_stop() when cleaning up a netns. However, tipc has been able to enable a bearer and start d->timer without the local node_addr set since Commit 52dfae5c85a4 ("tipc: obtain node identity from interface by default"), which caused the timer not to be deleted in tipc_net_stop() then. So fix it in tipc_net_stop() by changing to check local node_id instead of local node_addr, as Jon suggested. While at it, remove the calling of tipc_nametbl_withdraw() there, since tipc_nametbl_stop() will take of the nametbl's freeing after. Fixes: 52dfae5c85a4 ("tipc: obtain node identity from interface by default") Reported-by: syzbot+a25307ad099309f1c2b9@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/net.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/tipc/net.c b/net/tipc/net.c index f076edb74338..7ce1e86b024f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr) void tipc_net_stop(struct net *net) { - u32 self = tipc_own_addr(net); - - if (!self) + if (!tipc_own_id(net)) return; - tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self); rtnl_lock(); tipc_bearer_stop(net); tipc_node_stop(net); -- cgit v1.2.3 From 450895d04ba13a96886eddfeddb11556ae8624f1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 24 Mar 2019 00:18:46 +0200 Subject: net: phy: bcm54xx: Encode link speed and activity into LEDs Previously the green and amber LEDs on this quad PHY were solid, to indicate an encoding of the link speed (10/100/1000). This keeps the LEDs always on just as before, but now they flash on Rx/Tx activity. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 13 +++++++++++++ include/linux/brcmphy.h | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9605d4fe540b..cb86a3e90c7d 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -323,6 +323,19 @@ static int bcm54xx_config_init(struct phy_device *phydev) bcm54xx_phydsp_config(phydev); + /* Encode link speed into LED1 and LED3 pair (green/amber). + * Also flash these two LEDs on activity. This means configuring + * them for MULTICOLOR and encoding link/activity into them. + */ + val = BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1); + bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, val); + + val = BCM_LED_MULTICOLOR_IN_PHASE | + BCM5482_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT); + bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val); + return 0; } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 9cd00a37b8d3..6db2d9a6e503 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -148,6 +148,22 @@ #define BCM_LED_SRC_OFF 0xe /* Tied high */ #define BCM_LED_SRC_ON 0xf /* Tied low */ +/* + * Broadcom Multicolor LED configurations (expansion register 4) + */ +#define BCM_EXP_MULTICOLOR (MII_BCM54XX_EXP_SEL_ER + 0x04) +#define BCM_LED_MULTICOLOR_IN_PHASE BIT(8) +#define BCM_LED_MULTICOLOR_LINK_ACT 0x0 +#define BCM_LED_MULTICOLOR_SPEED 0x1 +#define BCM_LED_MULTICOLOR_ACT_FLASH 0x2 +#define BCM_LED_MULTICOLOR_FDX 0x3 +#define BCM_LED_MULTICOLOR_OFF 0x4 +#define BCM_LED_MULTICOLOR_ON 0x5 +#define BCM_LED_MULTICOLOR_ALT 0x6 +#define BCM_LED_MULTICOLOR_FLASH 0x7 +#define BCM_LED_MULTICOLOR_LINK 0x8 +#define BCM_LED_MULTICOLOR_ACT 0x9 +#define BCM_LED_MULTICOLOR_PROGRAM 0xa /* * BCM5482: Shadow registers -- cgit v1.2.3 From c493b09b2792336f471d2206be180a4b4c1fc9ba Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 24 Mar 2019 00:21:03 +0100 Subject: net: devlink: skip info_get op call if it is not defined in dumpit In dumpit, unlike doit, the check for info_get op being defined is missing. Add it and avoid null pointer dereference in case driver does not define this op. Fixes: f9cf22882c60 ("devlink: add device information API") Reported-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/devlink.c b/net/core/devlink.c index 78e22cea4cc7..da0a29f30885 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3897,6 +3897,11 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, continue; } + if (!devlink->ops->info_get) { + idx++; + continue; + } + mutex_lock(&devlink->lock); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, -- cgit v1.2.3 From 047a013f8d0af8299ce2d02af152de6a30165ccc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 13:49:16 +0100 Subject: chelsio: use BUG() instead of BUG_ON(1) clang warns about possible bugs in a dead code branch after BUG_ON(1) when CONFIG_PROFILE_ALL_BRANCHES is enabled: drivers/net/ethernet/chelsio/cxgb4/sge.c:479:3: error: variable 'buf_size' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] BUG_ON(1); ^~~~~~~~~ include/asm-generic/bug.h:61:36: note: expanded from macro 'BUG_ON' #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) ^~~~~~~~~~~~~~~~~~~ include/linux/compiler.h:48:23: note: expanded from macro 'unlikely' # define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x))) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/sge.c:482:9: note: uninitialized use occurs here return buf_size; ^~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/sge.c:479:3: note: remove the 'if' if its condition is always true BUG_ON(1); ^ include/asm-generic/bug.h:61:32: note: expanded from macro 'BUG_ON' #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) ^ drivers/net/ethernet/chelsio/cxgb4/sge.c:459:14: note: initialize the variable 'buf_size' to silence this warning int buf_size; ^ = 0 Use BUG() here to create simpler code that clang understands correctly. Signed-off-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 3130b43bba52..02959035ed3f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2620,7 +2620,7 @@ static inline struct port_info *ethqset2pinfo(struct adapter *adap, int qset) } /* should never happen! */ - BUG_ON(1); + BUG(); return NULL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 88773ca58e6b..b3da81e90132 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -476,7 +476,7 @@ static inline int get_buf_size(struct adapter *adapter, break; default: - BUG_ON(1); + BUG(); } return buf_size; -- cgit v1.2.3 From 8c838f53e149871561a9261ac768a9c7071b43d0 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 25 Mar 2019 13:06:22 +0000 Subject: dpaa2-eth: fix race condition with bql frame accounting It might happen that Tx conf acknowledges a frame before it was subscribed in bql, as subscribing was previously done after the enqueue operation. This patch moves the netdev_tx_sent_queue call before the actual frame enqueue, so that this can never happen. Fixes: 569dac6a5a0d ("dpaa2-eth: bql support") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 1a68052abb94..dc339dc1adb2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -815,6 +815,14 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) */ queue_mapping = skb_get_queue_mapping(skb); fq = &priv->fq[queue_mapping]; + + fd_len = dpaa2_fd_get_len(&fd); + nq = netdev_get_tx_queue(net_dev, queue_mapping); + netdev_tx_sent_queue(nq, fd_len); + + /* Everything that happens after this enqueues might race with + * the Tx confirmation callback for this frame + */ for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { err = priv->enqueue(priv, fq, &fd, 0); if (err != -EBUSY) @@ -825,13 +833,10 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) percpu_stats->tx_errors++; /* Clean up everything, including freeing the skb */ free_tx_fd(priv, fq, &fd, false); + netdev_tx_completed_queue(nq, 1, fd_len); } else { - fd_len = dpaa2_fd_get_len(&fd); percpu_stats->tx_packets++; percpu_stats->tx_bytes += fd_len; - - nq = netdev_get_tx_queue(net_dev, queue_mapping); - netdev_tx_sent_queue(nq, fd_len); } return NETDEV_TX_OK; -- cgit v1.2.3