diff options
Diffstat (limited to 'net')
194 files changed, 6207 insertions, 1856 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9accde339601..a79365574531 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -663,7 +663,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a7a462e6fee5..dc1816e9d53b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -704,7 +704,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } - hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); out: diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index ab47acf2eb01..3284a7b0325d 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -63,7 +63,7 @@ enum batadv_dbg_level { BATADV_DBG_NC = BIT(5), BATADV_DBG_MCAST = BIT(6), BATADV_DBG_TP_METER = BIT(7), - BATADV_DBG_ALL = 127, + BATADV_DBG_ALL = 255, }; #ifdef CONFIG_BATMAN_ADV_DEBUG diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ad26559160fa..8f3b2969cc4e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -547,7 +547,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, if (bat_priv->algo_ops->neigh.hardif_init) bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); - hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); + hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list); out: spin_unlock_bh(&hard_iface->neigh_list_lock); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e2288421fe6b..1015d9c8d97d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -969,41 +969,38 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - size_t complete_len; size_t short_len; - int max_len; - - max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - complete_len = strlen(hdev->dev_name); - short_len = strlen(hdev->short_name); - - /* no space left for name */ - if (max_len < 1) - return ad_len; + size_t complete_len; - /* no name set */ - if (!complete_len) + /* no space left for name (+ NULL + type + len) */ + if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) return ad_len; - /* complete name fits and is eq to max short name len or smaller */ - if (complete_len <= max_len && - complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { + /* use complete name if present and fits */ + complete_len = strlen(hdev->dev_name); + if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len); - } + hdev->dev_name, complete_len + 1); - /* short name set and fits */ - if (short_len && short_len <= max_len) { + /* use short name if present */ + short_len = strlen(hdev->short_name); + if (short_len) return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->short_name, short_len); - } + hdev->short_name, short_len + 1); - /* no short name set so shorten complete name */ - if (!short_len) { - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->dev_name, max_len); + /* use shortened full name if present, we already know that name + * is longer then HCI_MAX_SHORT_NAME_LENGTH + */ + if (complete_len) { + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, + sizeof(name)); } return ad_len; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6b06629245a8..dde77bd59f91 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -106,6 +106,8 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len); + static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 736038085feb..1fba2a03f8ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6017,7 +6017,15 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) +static u8 calculate_name_len(struct hci_dev *hdev) +{ + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + + return append_local_name(hdev, buf, 0); +} + +static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, + bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; @@ -6030,9 +6038,8 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; } else { - /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) - max_len -= 3; + max_len -= calculate_name_len(hdev); if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; @@ -6063,12 +6070,13 @@ static bool appearance_managed(u32 adv_flags) return adv_flags & MGMT_ADV_FLAG_APPEARANCE; } -static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, + u8 len, bool is_adv_data) { int i, cur_len; u8 max_len; - max_len = tlv_data_max_len(adv_flags, is_adv_data); + max_len = tlv_data_max_len(hdev, adv_flags, is_adv_data); if (len > max_len) return false; @@ -6215,8 +6223,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -6429,8 +6437,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, rp.instance = cp->instance; rp.flags = cp->flags; - rp.max_adv_data_len = tlv_data_max_len(flags, true); - rp.max_scan_rsp_len = tlv_data_max_len(flags, false); + rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c5fea9393946..073d54afa056 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/timer.h> #include <linux/inetdevice.h> +#include <linux/mroute.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -972,13 +973,12 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) mod_timer(&query->timer, jiffies); } -void br_multicast_enable_port(struct net_bridge_port *port) +static void __br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; - spin_lock(&br->multicast_lock); if (br->multicast_disabled || !netif_running(br->dev)) - goto out; + return; br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) @@ -987,8 +987,14 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (port->multicast_router == MDB_RTR_TYPE_PERM && hlist_unhashed(&port->rlist)) br_multicast_add_router(br, port); +} -out: +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + __br_multicast_enable_port(port); spin_unlock(&br->multicast_lock); } @@ -1633,6 +1639,21 @@ static void br_multicast_err_count(const struct net_bridge *br, u64_stats_update_end(&pstats->syncp); } +static void br_multicast_pim(struct net_bridge *br, + struct net_bridge_port *port, + const struct sk_buff *skb) +{ + unsigned int offset = skb_transport_offset(skb); + struct pimhdr *pimhdr, _pimhdr; + + pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr); + if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION || + pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) + return; + + br_multicast_mark_router(br, port); +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, @@ -1645,8 +1666,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = ip_mc_check_igmp(skb, &skb_trimmed); if (err == -ENOMSG) { - if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { + if (ip_hdr(skb)->protocol == IPPROTO_PIM) + br_multicast_pim(br, port, skb); + } return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); @@ -1994,8 +2019,9 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { - int err = 0; struct net_bridge_mdb_htable *mdb; + struct net_bridge_port *port; + int err = 0; spin_lock_bh(&br->multicast_lock); if (br->multicast_disabled == !val) @@ -2023,10 +2049,9 @@ rollback: goto rollback; } - br_multicast_start_querier(br, &br->ip4_own_query); -#if IS_ENABLED(CONFIG_IPV6) - br_multicast_start_querier(br, &br->ip6_own_query); -#endif + br_multicast_open(br); + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port(port); unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 9cebf47ac840..e7ef1a1ef3a6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT config NF_LOG_BRIDGE tristate "Bridge packet logging" + select NF_LOG_COMMON endif # NF_TABLES_BRIDGE diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 1663df598545..c197b1f844ee 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_IPV6): - nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_ARP): - case htons(ETH_P_RARP): - nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - } + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 00d2601407c5..1a7c9a79a53c 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -26,7 +26,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data, while (got < num_pages) { rc = get_user_pages_unlocked( (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, 0, pages + got); + num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); if (rc < 0) break; BUG_ON(rc == 0); diff --git a/net/core/datagram.c b/net/core/datagram.c index bfb973aebb5b..49816af8586b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -165,6 +165,7 @@ done: * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @destructor: invoked under the receive lock on successful dequeue * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts @@ -197,6 +198,8 @@ done: * the standard around please. */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last) { @@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } atomic_inc(&skb->users); - } else + } else { __skb_unlink(skb, queue); - + if (destructor) + destructor(sk, skb); + } spin_unlock_irqrestore(&queue->lock, cpu_flags); *off = _off; return skb; @@ -262,6 +267,8 @@ no_packet: EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err) { struct sk_buff *skb, *last; @@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, - &last); + skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, + off, err, &last); if (skb) return skb; @@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, err); + NULL, &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 6aa43cd8cbb5..7385c1a152fd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1948,37 +1948,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) +{ + if (dev->num_tc) { + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { + if ((txq - tc->offset) < tc->count) + return i; + } + + return -1; + } + + return 0; +} + #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) -static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, - int cpu, u16 index) +static bool remove_xps_queue(struct xps_dev_maps *dev_maps, + int tci, u16 index) { struct xps_map *map = NULL; int pos; if (dev_maps) - map = xmap_dereference(dev_maps->cpu_map[cpu]); + map = xmap_dereference(dev_maps->cpu_map[tci]); + if (!map) + return false; - for (pos = 0; map && pos < map->len; pos++) { - if (map->queues[pos] == index) { - if (map->len > 1) { - map->queues[pos] = map->queues[--map->len]; - } else { - RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); - kfree_rcu(map, rcu); - map = NULL; - } + for (pos = map->len; pos--;) { + if (map->queues[pos] != index) + continue; + + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; break; } + + RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL); + kfree_rcu(map, rcu); + return false; } - return map; + return true; } -static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +static bool remove_xps_queue_cpu(struct net_device *dev, + struct xps_dev_maps *dev_maps, + int cpu, u16 offset, u16 count) +{ + int num_tc = dev->num_tc ? : 1; + bool active = false; + int tci; + + for (tci = cpu * num_tc; num_tc--; tci++) { + int i, j; + + for (i = count, j = offset; i--; j++) { + if (!remove_xps_queue(dev_maps, cpu, j)) + break; + } + + active |= i < 0; + } + + return active; +} + +static void netif_reset_xps_queues(struct net_device *dev, u16 offset, + u16 count) { struct xps_dev_maps *dev_maps; int cpu, i; @@ -1990,21 +2033,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) if (!dev_maps) goto out_no_maps; - for_each_possible_cpu(cpu) { - for (i = index; i < dev->num_tx_queues; i++) { - if (!remove_xps_queue(dev_maps, cpu, i)) - break; - } - if (i == dev->num_tx_queues) - active = true; - } + for_each_possible_cpu(cpu) + active |= remove_xps_queue_cpu(dev, dev_maps, cpu, + offset, count); if (!active) { RCU_INIT_POINTER(dev->xps_maps, NULL); kfree_rcu(dev_maps, rcu); } - for (i = index; i < dev->num_tx_queues; i++) + for (i = offset + (count - 1); count--; i--) netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), NUMA_NO_NODE); @@ -2012,6 +2050,11 @@ out_no_maps: mutex_unlock(&xps_map_mutex); } +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ + netif_reset_xps_queues(dev, index, dev->num_tx_queues - index); +} + static struct xps_map *expand_xps_map(struct xps_map *map, int cpu, u16 index) { @@ -2051,20 +2094,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + int i, cpu, tci, numa_node_id = -2; + int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); - int cpu, numa_node_id = -2; bool active = false; + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + + maps_sz = XPS_DEV_MAPS_SIZE(num_tc); + if (maps_sz < L1_CACHE_BYTES) + maps_sz = L1_CACHE_BYTES; + mutex_lock(&xps_map_mutex); dev_maps = xmap_dereference(dev->xps_maps); /* allocate memory for queue storage */ - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, mask)) - continue; - + for_each_cpu_and(cpu, cpu_online_mask, mask) { if (!new_dev_maps) new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { @@ -2072,25 +2123,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, return -ENOMEM; } - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + tci = cpu * num_tc + tc; + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) : NULL; map = expand_xps_map(map, cpu, index); if (!map) goto error; - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } if (!new_dev_maps) goto out_no_new_maps; for_each_possible_cpu(cpu) { + /* copy maps belonging to foreign traffic classes */ + for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } + + /* We need to explicitly update tci as prevous loop + * could break out early if dev_maps is NULL. + */ + tci = cpu * num_tc + tc; + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { /* add queue to CPU maps */ int pos = 0; - map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(new_dev_maps->cpu_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@ -2104,26 +2168,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, #endif } else if (dev_maps) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[cpu]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } + /* copy maps belonging to foreign traffic classes */ + for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } } rcu_assign_pointer(dev->xps_maps, new_dev_maps); /* Cleanup old maps */ - if (dev_maps) { - for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (!dev_maps) + goto out_no_old_maps; + + for_each_possible_cpu(cpu) { + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = xmap_dereference(dev_maps->cpu_map[tci]); if (map && map != new_map) kfree_rcu(map, rcu); } - - kfree_rcu(dev_maps, rcu); } + kfree_rcu(dev_maps, rcu); + +out_no_old_maps: dev_maps = new_dev_maps; active = true; @@ -2138,11 +2212,12 @@ out_no_new_maps: /* removes queue from unused CPUs */ for_each_possible_cpu(cpu) { - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) - continue; - - if (remove_xps_queue(dev_maps, cpu, index)) - active = true; + for (i = tc, tci = cpu * num_tc; i--; tci++) + active |= remove_xps_queue(dev_maps, tci, index); + if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) + active |= remove_xps_queue(dev_maps, tci, index); + for (i = num_tc - tc, tci++; --i; tci++) + active |= remove_xps_queue(dev_maps, tci, index); } /* free map if not active */ @@ -2158,11 +2233,14 @@ out_no_maps: error: /* remove any maps that we added */ for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : - NULL; - if (new_map && new_map != map) - kfree(new_map); + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[tci]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } } mutex_unlock(&xps_map_mutex); @@ -2173,6 +2251,44 @@ error: EXPORT_SYMBOL(netif_set_xps_queue); #endif +void netdev_reset_tc(struct net_device *dev) +{ +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, 0); +#endif + dev->num_tc = 0; + memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); + memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} +EXPORT_SYMBOL(netdev_reset_tc); + +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ + if (tc >= dev->num_tc) + return -EINVAL; + +#ifdef CONFIG_XPS + netif_reset_xps_queues(dev, offset, count); +#endif + dev->tc_to_txq[tc].count = count; + dev->tc_to_txq[tc].offset = offset; + return 0; +} +EXPORT_SYMBOL(netdev_set_tc_queue); + +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ + if (num_tc > TC_MAX_QUEUE) + return -EINVAL; + +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, 0); +#endif + dev->num_tc = num_tc; + return 0; +} +EXPORT_SYMBOL(netdev_set_num_tc); + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -2899,6 +3015,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -3084,8 +3201,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[skb->sender_cpu - 1]); + unsigned int tci = skb->sender_cpu - 1; + + if (dev->num_tc) { + tci *= dev->num_tc; + tci += netdev_get_prio_tc_map(dev, skb->priority); + } + + map = rcu_dereference(dev_maps->cpu_map[tci]); if (map) { if (map->len == 1) queue_index = map->queues[0]; @@ -3709,7 +3832,7 @@ int netif_rx_ni(struct sk_buff *skb) } EXPORT_SYMBOL(netif_rx_ni); -static void net_tx_action(struct softirq_action *h) +static __latent_entropy void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -4359,7 +4482,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) + if (skb->csum_bad) goto normal; gro_list_prepare(napi, skb); @@ -4372,9 +4495,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_set_network_header(skb, skb_gro_offset(skb)); skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; - NAPI_GRO_CB(skb)->flush = 0; + NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; @@ -4893,7 +5017,7 @@ EXPORT_SYMBOL(sk_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ -void napi_hash_add(struct napi_struct *napi) +static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) @@ -4913,7 +5037,6 @@ void napi_hash_add(struct napi_struct *napi) spin_unlock(&napi_hash_lock); } -EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi @@ -5062,7 +5185,7 @@ out_unlock: return work; } -static void net_rx_action(struct softirq_action *h) +static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; @@ -7527,7 +7650,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->tx_queue_len) { dev->priv_flags |= IFF_NO_QUEUE; - dev->tx_queue_len = 1; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; } dev->num_tx_queues = txqs; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index be4629c344a6..b6791d94841d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include <net/fib_rules.h> #include <net/ip_tunnels.h> +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int uid_range_set(struct fib_kuid_range *range) +{ + return uid_valid(range->start) && uid_valid(range->end); +} + +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) +{ + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; +} + +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) +{ + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -305,6 +343,10 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, if (r->l3mdev != rule->l3mdev) continue; + if (!uid_eq(r->uid_range.start, rule->uid_range.start) || + !uid_eq(r->uid_range.end, rule->uid_range.end)) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -429,6 +471,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -497,6 +554,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -516,6 +574,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -552,6 +618,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) continue; + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -619,7 +690,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size_64bit(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8) /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -679,7 +751,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->tun_id && nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && - nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) + nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0cc607d05fc8..87e01815ec85 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -246,15 +246,13 @@ ipv6: case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; + struct vlan_hdr _vlan; + bool vlan_tag_present = skb && skb_vlan_tag_present(skb); - if (skb_vlan_tag_present(skb)) + if (vlan_tag_present) proto = skb->protocol; - if (!skb_vlan_tag_present(skb) || - proto == cpu_to_be16(ETH_P_8021Q) || - proto == cpu_to_be16(ETH_P_8021AD)) { - struct vlan_hdr _vlan; - + if (!vlan_tag_present || eth_type_vlan(skb->protocol)) { vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) @@ -272,7 +270,7 @@ ipv6: FLOW_DISSECTOR_KEY_VLAN, target_container); - if (skb_vlan_tag_present(skb)) { + if (vlan_tag_present) { key_vlan->vlan_id = skb_vlan_tag_get_id(skb); key_vlan->vlan_priority = (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 88fd64250b02..03976e939818 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -39,6 +39,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "MPLS"; case LWTUNNEL_ENCAP_ILA: return "ILA"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d4fe28606ff5..b0c04cf4851d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1024,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } -#ifdef CONFIG_XPS static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; @@ -1036,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } +static ssize_t show_traffic_class(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + struct net_device *dev = queue->dev; + int index = get_netdev_queue_index(queue); + int tc = netdev_txq_to_tc(dev, index); + + if (tc < 0) + return -EINVAL; + + return sprintf(buf, "%u\n", tc); +} + +#ifdef CONFIG_XPS static ssize_t show_tx_maxrate(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) @@ -1078,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate = static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); +static struct netdev_queue_attribute queue_traffic_class = + __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL); + #ifdef CONFIG_BQL /* * Byte queue limits sysfs structures and functions. @@ -1193,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { struct net_device *dev = queue->dev; + int cpu, len, num_tc = 1, tc = 0; struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; - int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; index = get_netdev_queue_index(queue); + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - for_each_possible_cpu(i) { - struct xps_map *map = - rcu_dereference(dev_maps->cpu_map[i]); - if (map) { - int j; - for (j = 0; j < map->len; j++) { - if (map->queues[j] == index) { - cpumask_set_cpu(i, mask); - break; - } + for_each_possible_cpu(cpu) { + int i, tci = cpu * num_tc + tc; + struct xps_map *map; + + map = rcu_dereference(dev_maps->cpu_map[tci]); + if (!map) + continue; + + for (i = map->len; i--;) { + if (map->queues[i] == index) { + cpumask_set_cpu(cpu, mask); + break; } } } @@ -1263,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute = static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, + &queue_traffic_class.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, &queue_tx_maxrate.attr, diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b9243b14af17..1309d78e2a64 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -215,13 +215,14 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { + unsigned long flags; bool alloc; int id; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; @@ -230,11 +231,12 @@ int peernet2id_alloc(struct net *net, struct net *peer) /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { + unsigned long flags; int id; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); id = __peernet2id(net, peer); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); return id; } EXPORT_SYMBOL(peernet2id); @@ -249,17 +251,18 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { + unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); return peer; @@ -429,17 +432,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_bh(&tmp->nsid_lock); + spin_lock_irq(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); + spin_unlock_irq(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_bh(&net->nsid_lock); + spin_lock_irq(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irq(&net->nsid_lock); } rtnl_unlock(); @@ -568,6 +571,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + unsigned long flags; struct net *peer; int nsid, err; @@ -588,15 +592,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); if (__peernet2id(net, peer) >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -718,10 +722,11 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; + unsigned long flags; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); cb->args[0] = net_cb.idx; return skb->len; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 5219a9e2127a..306b8f0e03c1 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -216,8 +216,8 @@ #define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ /* If lock -- protects updating of if_list */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); +#define if_lock(t) mutex_lock(&(t->if_lock)); +#define if_unlock(t) mutex_unlock(&(t->if_lock)); /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -423,7 +423,7 @@ struct pktgen_net { }; struct pktgen_thread { - spinlock_t if_lock; /* for list of devices */ + struct mutex if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; @@ -2010,11 +2010,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d { struct pktgen_thread *t; + mutex_lock(&pktgen_thread_lock); + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - rcu_read_lock(); - list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { + if_lock(t); + list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -2029,8 +2031,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } - rcu_read_unlock(); + if_unlock(t); } + mutex_unlock(&pktgen_thread_lock); } static int pktgen_device_event(struct notifier_block *unused, @@ -3762,7 +3765,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) return -ENOMEM; } - spin_lock_init(&t->if_lock); + mutex_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e3e0087245b..0b2a6e94af2d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3725,7 +3725,6 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; spin_unlock_irqrestore(&q->lock, flags); - sk->sk_err = err; if (err) sk->sk_error_report(sk); diff --git a/net/core/sock.c b/net/core/sock.c index d8e4532e89e7..40dbc13453f9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2460,8 +2460,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index e92b759d906c..9a1a352fd1eb 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -129,7 +129,6 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) return 0; } -EXPORT_SYMBOL(reuseport_add_sock); static void reuseport_free_rcu(struct rcu_head *head) { diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f983c102ebe3..8c5a479681ca 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,6 +62,7 @@ #include <net/dsa.h> #include <net/flow_dissector.h> #include <linux/uaccess.h> +#include <net/pkt_sched.h> __setup("ether=", netdev_boot_setup); @@ -359,7 +360,7 @@ void ether_setup(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; - dev->tx_queue_len = 1000; /* Ethernet wants good queues */ + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; @@ -440,7 +441,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5ee1d43f1310..4ebe2aa3e7d3 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -300,10 +300,6 @@ static void hsr_forward_do(struct hsr_frame_info *frame) static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, struct hsr_frame_info *frame) { - struct net_device *master_dev; - - master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev; - if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) { frame->is_local_exclusive = true; skb->pkt_type = PACKET_HOST; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1effc986739e..9648c97e541f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1391,7 +1391,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..d93eea8e2409 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -610,6 +610,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 6cb57bb8692d..805f6607f8d9 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -249,7 +249,7 @@ static struct sk_buff **fou_gro_receive(struct sock *sk, if (!ops || !ops->callbacks.gro_receive) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -441,7 +441,7 @@ next_proto: if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 96e0efecefa6..d5cac99170b1 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -229,7 +229,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ skb_gro_postpull_rcsum(skb, greh, grehlen); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..53a890b605fc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61a9deec2993..d5d3ead0a6c3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3b34024202d8..4dea33e5f295 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -861,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); - int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + int i, num, s_i, s_num; + struct sock *sk; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -877,7 +878,6 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; @@ -922,13 +922,14 @@ skip_listen_ht: if (!(idiag_states & ~TCPF_LISTEN)) goto out; +#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct hlist_nulls_node *node; - struct sock *sk; - - num = 0; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; if (hlist_nulls_empty(&head->chain)) continue; @@ -936,9 +937,12 @@ skip_listen_ht: if (i > s_i) s_num = 0; +next_chunk: + num = 0; + accum = 0; spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int state, res; + int state; if (!net_eq(sock_net(sk), net)) continue; @@ -962,21 +966,35 @@ skip_listen_ht: if (!inet_diag_bc_sk(bc, sk)) goto next_normal; - res = sk_diag_fill(sk, skb, r, + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); - if (res < 0) { - spin_unlock_bh(lock); - goto done; + if (res < 0) + num = num_arr[idx]; } -next_normal: - ++num; + sock_gen_put(sk_arr[idx]); } - - spin_unlock_bh(lock); + if (res < 0) + break; cond_resched(); + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } } done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 77c20a489218..ca97835bfec4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -25,6 +25,7 @@ #include <net/inet_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> +#include <net/tcp.h> #include <net/sock_reuseport.h> static u32 inet_ehashfn(const struct net *net, const __be32 laddr, @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 05d105832bdb..37dfacd340af 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -538,7 +538,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; int ptr; - struct net_device *dev; struct sk_buff *skb2; unsigned int mtu, hlen, left, len, ll_rs; int offset; @@ -546,8 +545,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rtable *rt = skb_rtable(skb); int err = 0; - dev = rt->dst.dev; - /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) @@ -1590,7 +1587,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index af4919792b6a..8b13881ed064 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -97,8 +97,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) +{ + int val; + + if (IPCB(skb)->frag_max_size == 0) + return; + + val = IPCB(skb)->frag_max_size; + put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); +} + static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { __wsum csum = skb->csum; @@ -106,8 +117,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb_transport_header(skb), - offset, 0)); + csum = csum_sub(csum, + csum_partial(skb_transport_header(skb) + tlen, + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -152,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int offset) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset) { - struct inet_sock *inet = inet_sk(skb->sk); + struct inet_sock *inet = inet_sk(sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ @@ -216,7 +228,10 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } if (flags & IP_CMSG_CHECKSUM) - ip_cmsg_recv_checksum(msg, skb, offset); + ip_cmsg_recv_checksum(msg, skb, tlen, offset); + + if (flags & IP_CMSG_RECVFRAGSIZE) + ip_cmsg_recv_fragsize(msg, skb); } EXPORT_SYMBOL(ip_cmsg_recv_offset); @@ -613,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: + case IP_RECVFRAGSIZE: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -725,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, } } break; + case IP_RECVFRAGSIZE: + if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) + goto e_inval; + if (val) + inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; + else + inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1356,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_CHECKSUM: val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; break; + case IP_RECVFRAGSIZE: + val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..f2fd13b07273 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -137,6 +137,9 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp4)); + err = fib_rules_lookup(net->ipv4.mr_rules_ops, flowi4_to_flowi(flp4), 0, &arg); if (err < 0) @@ -163,7 +166,9 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, return -EINVAL; } - mrt = ipmr_get_table(rule->fr_net, rule->table); + arg->table = fib_rule_get_table(rule, arg); + + mrt = ipmr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; @@ -1809,6 +1814,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { + struct net_device *mdev; + + mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); + if (mdev == skb->dev) + goto forward; + if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -2053,7 +2064,7 @@ static int pim_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) || + if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || (pim->flags & PIM_NULL_REGISTER) || (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d613309e3e5d..c11eb1744ab1 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV4 + tristate "IPv4 socket lookup support" + help + This option enables the IPv4 socket lookup infrastructure. This is + is required by the iptables socket match. + if NF_TABLES config NF_TABLES_IPV4 @@ -54,6 +60,14 @@ config NFT_DUP_IPV4 help This module enables IPv4 packet duplication support for nf_tables. +config NFT_FIB_IPV4 + select NFT_FIB + tristate "nf_tables fib / ip route lookup support" + help + This module enables IPv4 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV4 config NF_TABLES_ARP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 853328f8fd05..f462fee66ac8 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o +obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o + # logging obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o @@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c new file mode 100644 index 000000000000..a83d558e1aae --- /dev/null +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/netfilter/nf_socket.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, + __be32 *raddr, __be32 *laddr, + __be16 *rport, __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return 1; + } + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + return NULL; +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be32 uninitialized_var(daddr), uninitialized_var(saddr); + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + u8 uninitialized_var(protocol); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn const *ct; +#endif + int doff = 0; + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return NULL; + } else { + return NULL; + } + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct) && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_ESTABLISHED_REPLY) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_RELATED_REPLY)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure"); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c new file mode 100644 index 000000000000..db91fd42db67 --- /dev/null +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -0,0 +1,238 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +#include <net/ip_fib.h> +#include <net/route.h> + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool fib4_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + + return rt && (rt->rt_flags & RTCF_LOCAL); +} + +#define DSCP_BITS 0xfc + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dst = ®s->data[priv->dreg]; + const struct net_device *dev = NULL; + const struct iphdr *iph; + __be32 addr; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + dev = pkt->out; + + iph = ip_hdr(pkt->skb); + if (priv->flags & NFTA_FIB_F_DADDR) + addr = iph->daddr; + else + addr = iph->saddr; + + *dst = inet_dev_addr_type(pkt->net, dev, addr); +} +EXPORT_SYMBOL_GPL(nft_fib4_eval_type); + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + const struct iphdr *iph; + struct fib_result res; + struct flowi4 fl4 = { + .flowi4_scope = RT_SCOPE_UNIVERSE, + .flowi4_iif = LOOPBACK_IFINDEX, + }; + const struct net_device *oif; + struct net_device *found; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int i; +#endif + + /* + * Do not set flowi4_oif, it restricts results (for example, asking + * for oif 3 will get RTN_UNICAST result even if the daddr exits + * on another interface. + * + * Search results for the desired outinterface instead. + */ + if (priv->flags & NFTA_FIB_F_OIF) + oif = pkt->out; + else if (priv->flags & NFTA_FIB_F_IIF) + oif = pkt->in; + else + oif = NULL; + + if (pkt->hook == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + iph = ip_hdr(pkt->skb); + if (ipv4_is_multicast(iph->daddr) && + ipv4_is_zeronet(iph->saddr) && + ipv4_is_local_multicast(iph->daddr)) { + nft_fib_store_result(dest, priv->result, pkt, + get_ifindex(pkt->skb->dev)); + return; + } + + if (priv->flags & NFTA_FIB_F_MARK) + fl4.flowi4_mark = pkt->skb->mark; + + fl4.flowi4_tos = iph->tos & DSCP_BITS; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl4.daddr = iph->daddr; + fl4.saddr = get_saddr(iph->saddr); + } else { + fl4.daddr = iph->saddr; + fl4.saddr = get_saddr(iph->daddr); + } + + if (fib_lookup(pkt->net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + return; + + switch (res.type) { + case RTN_UNICAST: + break; + case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */ + return; + default: + break; + } + + if (!oif) { + found = FIB_RES_DEV(res); + goto ok; + } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (i = 0; i < res.fi->fib_nhs; i++) { + struct fib_nh *nh = &res.fi->fib_nh[i]; + + if (nh->nh_dev == oif) { + found = nh->nh_dev; + goto ok; + } + } + return; +#else + found = FIB_RES_DEV(res); + if (found != oif) + return; +#endif +ok: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = found->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, found->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib4_eval); + +static struct nft_expr_type nft_fib4_type; + +static const struct nft_expr_ops nft_fib4_type_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib4_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib4_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib4_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib4_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib4_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib4_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib4_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, +}; + +static int __init nft_fib4_module_init(void) +{ + return nft_register_expr(&nft_fib4_type); +} + +static void __exit nft_fib4_module_exit(void) +{ + nft_unregister_expr(&nft_fib4_type); +} + +module_init(nft_fib4_module_init); +module_exit(nft_fib4_module_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7cf7d6e380c2..d11129f1178d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -789,7 +789,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); @@ -994,7 +995,7 @@ struct proto ping_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, .sendmsg = ping_v4_sendmsg, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 03618ed03532..2300fae11b22 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { rfv.msg = msg; @@ -695,12 +695,20 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int ret = -EINVAL; int chk_addr_ret; if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); + + if (sk->sk_bound_dev_if) + tb_id = l3mdev_fib_table_by_index(sock_net(sk), + sk->sk_bound_dev_if) ? : tb_id; + + chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr, + tb_id); + ret = -EADDRNOTAVAIL; if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -920,7 +928,7 @@ int raw_abort(struct sock *sk, int err) sk->sk_err = err; sk->sk_error_report(sk); - udp_disconnect(sk, 0); + __udp_disconnect(sk, 0); release_sock(sk); @@ -934,7 +942,7 @@ struct proto raw_prot = { .close = raw_close, .destroy = raw_destroy, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = raw_ioctl, .init = raw_init, .setsockopt = raw_setsockopt, diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index be930908bcf9..e1a51ca68d23 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -79,10 +79,11 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 * hashinfo->lock here. */ sock_hold(sk); - break; + goto out_unlock; } } } +out_unlock: read_unlock(&hashinfo->lock); return sk ? sk : ERR_PTR(-ENOENT); @@ -205,11 +206,14 @@ static int raw_diag_destroy(struct sk_buff *in_skb, { struct net *net = sock_net(in_skb->sk); struct sock *sk; + int err; sk = raw_sock_get(net, r); if (IS_ERR(sk)) return PTR_ERR(sk); - return sock_diag_destroy(sk, ECONNABORTED); + err = sock_diag_destroy(sk, ECONNABORTED); + sock_put(sk); + return err; } #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62d4d90c1389..2355883e1025 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -523,7 +524,8 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, @@ -535,7 +537,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -552,7 +554,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -800,7 +802,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1018,7 +1020,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1034,7 +1036,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1053,6 +1055,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1066,7 +1069,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1106,7 +1109,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1121,9 +1124,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); @@ -1980,25 +1984,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, */ if (ipv4_is_multicast(daddr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); + int our = 0; + + if (in_dev) + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); + + /* check l3 master if no match yet */ + if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + struct in_device *l3_in_dev; + + l3_in_dev = __in_dev_get_rcu(skb->dev); + if (l3_in_dev) + our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, + ip_hdr(skb)->protocol); + } - if (in_dev) { - int our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); - if (our + res = -EINVAL; + if (our #ifdef CONFIG_IP_MROUTE - || - (!ipv4_is_local_multicast(daddr) && - IN_DEV_MFORWARD(in_dev)) + || + (!ipv4_is_local_multicast(daddr) && + IN_DEV_MFORWARD(in_dev)) #endif - ) { - int res = ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); - rcu_read_unlock(); - return res; - } + ) { + res = ip_route_input_mc(skb, daddr, saddr, + tos, dev, our); } rcu_read_unlock(); - return -EINVAL; + return res; } res = ip_route_input_slow(skb, daddr, saddr, tos, dev); rcu_read_unlock(); @@ -2266,7 +2280,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + (ipv4_is_multicast(fl4->daddr) || + !netif_index_is_l3_master(net, fl4->flowi4_oif))) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2493,6 +2508,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2545,6 +2565,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2572,6 +2593,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2579,6 +2604,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..0dc6286272aa 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -372,7 +372,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest); + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1cb67de106fe..80bc36b25de2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -96,11 +96,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } /* Update system visible IP port range */ @@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock(&net->ipv4.ping_group_range.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock(&net->ipv4.ping_group_range.lock); } /* Validate changes from /proc interface. */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f..a7d54cbcdabb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -405,7 +405,6 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - u64_stats_init(&tp->syncp); tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2710,9 +2709,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp, intv; - unsigned int start; - int notsent_bytes; u64 rate64; + bool slow; u32 rate; memset(info, 0, sizeof(*info)); @@ -2721,6 +2719,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_state = sk_state_load(sk); + /* Report meaningful fields for all TCP states, including listeners */ + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); + + rate = READ_ONCE(sk->sk_max_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); + + info->tcpi_reordering = tp->reordering; + info->tcpi_snd_cwnd = tp->snd_cwnd; + + if (info->tcpi_state == TCP_LISTEN) { + /* listeners aliased fields : + * tcpi_unacked -> Number of children ready for accept() + * tcpi_sacked -> max backlog + */ + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + return; + } info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2748,13 +2767,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - if (info->tcpi_state == TCP_LISTEN) { - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; - } else { - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - } + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2768,34 +2783,24 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = tp->srtt_us >> 3; info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; - info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; - info->tcpi_reordering = tp->reordering; info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; - rate = READ_ONCE(sk->sk_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_pacing_rate); + slow = lock_sock_fast(sk); - rate = READ_ONCE(sk->sk_max_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_max_pacing_rate); + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); + info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); + + unlock_sock_fast(sk, slow); - do { - start = u64_stats_fetch_begin_irq(&tp->syncp); - put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); - put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); - } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); - info->tcpi_notsent_bytes = max(0, notsent_bytes); - info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 0ea66c2c9344..b89bce4c721e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -14,6 +14,36 @@ * observed, or adjust the sending rate if it estimates there is a * traffic policer, in order to keep the drop rate reasonable. * + * Here is a state transition diagram for BBR: + * + * | + * V + * +---> STARTUP ----+ + * | | | + * | V | + * | DRAIN ----+ + * | | | + * | V | + * +---> PROBE_BW ----+ + * | ^ | | + * | | | | + * | +----+ | + * | | + * +---- PROBE_RTT <--+ + * + * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. + * When it estimates the pipe is full, it enters DRAIN to drain the queue. + * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. + * A long-lived BBR flow spends the vast majority of its time remaining + * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth + * in a fair manner, with a small, bounded queue. *If* a flow has been + * continuously sending for the entire min_rtt window, and hasn't seen an RTT + * sample that matches or decreases its min_rtt estimate for 10 seconds, then + * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe + * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if + * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; + * otherwise we enter STARTUP to try to fill the pipe. + * * BBR is described in detail in: * "BBR: Congestion-Based Congestion Control", * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, @@ -51,7 +81,7 @@ enum bbr_mode { BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ BBR_DRAIN, /* drain any queue created during startup */ BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ - BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ + BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ }; /* BBR congestion control block */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f2c59c8e57ff..a70046fea0e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3351,9 +3351,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) u32 delta = ack - tp->snd_una; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_acked += delta; - u64_stats_update_end_raw(&tp->syncp); tp->snd_una = ack; } @@ -3363,9 +3361,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) u32 delta = seq - tp->rcv_nxt; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_received += delta; - u64_stats_update_end_raw(&tp->syncp); tp->rcv_nxt = seq; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 83b3d0b8c481..6491b7c1f975 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -86,7 +86,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, @@ -692,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -712,7 +712,7 @@ out: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -727,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -776,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -791,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -819,7 +821,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, @@ -1887,7 +1889,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; - struct inet_connection_sock *icsk; struct sock *sk = cur; if (!sk) { @@ -1909,7 +1910,6 @@ get_sk: continue; if (sk->sk_family == st->family) return sk; - icsk = inet_csk(sk); } spin_unlock(&ilb->lock); st->offset = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 896e9dfbdb5c..f57b5aa51b59 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2529,8 +2529,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_unlink_write_queue(next_skb, sk); - skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), - next_skb_size); + if (next_skb_size) + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), + next_skb_size); if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; @@ -2567,14 +2568,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) return false; - /* TODO: SACK collapsing could be used to remove this condition */ - if (skb_shinfo(skb)->nr_frags != 0) - return false; if (skb_cloned(skb)) return false; if (skb == tcp_send_head(sk)) return false; - /* Some heurestics for collapsing over SACK'd could be invented */ + /* Some heuristics for collapsing over SACK'd could be invented */ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) return false; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c8332715ee2d..097b70628631 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1019,7 +1019,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); @@ -1172,26 +1173,26 @@ out: return ret; } +/* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial) { int amt; atomic_sub(size, &sk->sk_rmem_alloc); - - spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_forward_alloc += size; amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); sk->sk_forward_alloc -= amt; - spin_unlock_bh(&sk->sk_receive_queue.lock); if (amt) __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); } -static void udp_rmem_free(struct sk_buff *skb) +/* Note: called with sk_receive_queue.lock held */ +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { - udp_rmem_release(skb->sk, skb->truesize, 1); + udp_rmem_release(sk, skb->truesize, 1); } +EXPORT_SYMBOL(udp_skb_destructor); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { @@ -1228,9 +1229,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) sk->sk_forward_alloc -= size; - /* the skb owner in now the udp socket */ - skb->sk = sk; - skb->destructor = udp_rmem_free; + /* no need to setup a destructor, we will explicitly release the + * forward allocated memory on dequeue + */ skb->dev = NULL; sock_skb_set_dropcount(sk, skb); @@ -1254,8 +1255,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); static void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ - __skb_queue_purge(&sk->sk_receive_queue); - udp_rmem_release(sk, 0, 0); + unsigned int total = 0; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + total += skb->truesize; + kfree_skb(skb); + } + udp_rmem_release(sk, total, 0); + inet_sock_destruct(sk); } @@ -1287,12 +1295,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp); */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; + int total = 0; int res; - __skb_queue_head_init(&list_kill); - spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { @@ -1302,12 +1309,13 @@ static int first_packet_length(struct sock *sk) IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); - __skb_queue_tail(&list_kill, skb); + total += skb->truesize; + kfree_skb(skb); } res = skb ? skb->len : -1; + if (total) + udp_rmem_release(sk, total, 1); spin_unlock_bh(&rcvq->lock); - - __skb_queue_purge(&list_kill); return res; } @@ -1362,8 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -1420,7 +1427,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off); + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) @@ -1442,7 +1449,7 @@ csum_copy_err: goto try_again; } -int udp_disconnect(struct sock *sk, int flags) +int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); /* @@ -1464,6 +1471,15 @@ int udp_disconnect(struct sock *sk, int flags) sk_dst_reset(sk); return 0; } +EXPORT_SYMBOL(__udp_disconnect); + +int udp_disconnect(struct sock *sk, int flags) +{ + lock_sock(sk); + __udp_disconnect(sk, flags); + release_sock(sk); + return 0; +} EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) @@ -2272,7 +2288,7 @@ int udp_abort(struct sock *sk, int err) sk->sk_err = err; sk->sk_error_report(sk); - udp_disconnect(sk, 0); + __udp_disconnect(sk, 0); release_sock(sk); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9333c963607..b2be1d9757ef 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -295,7 +295,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = udp_sk(sk)->gro_receive(sk, head, skb); + pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2343e4f2e0bf..0f00811a785f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -289,4 +289,28 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_SEG6_INLINE + bool "IPv6: direct Segment Routing Header insertion " + depends on IPV6 + ---help--- + Support for direct insertion of the Segment Routing Header, + also known as inline mode. Be aware that direct insertion of + extension headers (as opposed to encapsulation) may break + multiple mechanisms such as PMTUD or IPSec AH. Use this feature + only if you know exactly what you are doing. + + If unsure, say N. + +config IPV6_SEG6_HMAC + bool "IPv6: Segment Routing HMAC support" + depends on IPV6 + select CRYPTO_HMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + ---help--- + Support for HMAC signature generation and verification + of SR-enabled packets. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c174ccb340a1..129cad2ba960 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o + udp_offload.o seg6.o seg6_iptunnel.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -44,6 +44,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o +obj-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d8983e15f859..86219c0a0104 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -147,9 +147,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -static void __ipv6_regen_rndid(struct inet6_dev *idev); -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); -static void ipv6_regen_rndid(unsigned long data); +static void ipv6_regen_rndid(struct inet6_dev *idev); +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -239,6 +238,10 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -285,6 +288,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; /* Check if a valid qdisc is available */ @@ -409,9 +416,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) goto err_release; } - /* One reference from device. We must do this before - * we invoke __ipv6_regen_rndid(). - */ + /* One reference from device. */ in6_dev_hold(ndev); if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -425,17 +430,15 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); - setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); + ndev->desync_factor = U32_MAX; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { ndev->cnf.use_tempaddr = -1; - } else { - in6_dev_hold(ndev); - ipv6_regen_rndid((unsigned long) ndev); - } + } else + ipv6_regen_rndid(ndev); ndev->token = in6addr_any; @@ -447,7 +450,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; } @@ -1190,6 +1192,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i int ret = 0; u32 addr_flags; unsigned long now = jiffies; + long max_desync_factor; + s32 cnf_temp_preferred_lft; write_lock_bh(&idev->lock); if (ift) { @@ -1222,23 +1226,42 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - __ipv6_try_regen_rndid(idev, tmpaddr); + ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; + + regen_advance = idev->cnf.regen_max_retry * + idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + + /* recalculate max_desync_factor each time and update + * idev->desync_factor if it's larger + */ + cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); + max_desync_factor = min_t(__u32, + idev->cnf.max_desync_factor, + cnf_temp_preferred_lft - regen_advance); + + if (unlikely(idev->desync_factor > max_desync_factor)) { + if (max_desync_factor > 0) { + get_random_bytes(&idev->desync_factor, + sizeof(idev->desync_factor)); + idev->desync_factor %= max_desync_factor; + } else { + idev->desync_factor = 0; + } + } + tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); - tmp_prefered_lft = min_t(__u32, - ifp->prefered_lft, - idev->cnf.temp_prefered_lft + age - - idev->cnf.max_desync_factor); + tmp_prefered_lft = cnf_temp_preferred_lft + age - + idev->desync_factor; + tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft); tmp_plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); - regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred @@ -2150,7 +2173,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) } /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static void __ipv6_regen_rndid(struct inet6_dev *idev) +static void ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -2179,43 +2202,10 @@ regen: } } -static void ipv6_regen_rndid(unsigned long data) -{ - struct inet6_dev *idev = (struct inet6_dev *) data; - unsigned long expires; - - rcu_read_lock_bh(); - write_lock_bh(&idev->lock); - - if (idev->dead) - goto out; - - __ipv6_regen_rndid(idev); - - expires = jiffies + - idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) - - idev->cnf.max_desync_factor * HZ; - if (time_before(expires, jiffies)) { - pr_warn("%s: too short regeneration interval; timer disabled for %s\n", - __func__, idev->dev->name); - goto out; - } - - if (!mod_timer(&idev->regen_timer, expires)) - in6_dev_hold(idev); - -out: - write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); - in6_dev_put(idev); -} - -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - __ipv6_regen_rndid(idev); + ipv6_regen_rndid(idev); } /* @@ -2356,7 +2346,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, max_valid = 0; max_prefered = idev->cnf.temp_prefered_lft - - idev->cnf.max_desync_factor - age; + idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; @@ -3018,7 +3008,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (sp_ifa->rt->dst.obsolete > 0) { + if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { @@ -3594,9 +3584,6 @@ restart: if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - if (how && del_timer(&idev->regen_timer)) - in6_dev_put(idev); - /* Step 3: clear tempaddr list */ while (!list_empty(&idev->tempaddr_list)) { ifa = list_first_entry(&idev->tempaddr_list, @@ -4965,6 +4952,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; +#endif } static inline size_t inet6_ifla6_size(void) @@ -6057,6 +6048,22 @@ static const struct ctl_table addrconf_sysctl[] = { }, { + .procname = "seg6_enabled", + .data = &ipv6_devconf.seg6_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IPV6_SEG6_HMAC + { + .procname = "seg6_require_hmac", + .data = &ipv6_devconf.seg6_require_hmac, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..d424f3a3737a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> +#include <net/seg6.h> #include <asm/uaccess.h> #include <linux/mroute6.h> @@ -678,6 +679,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); @@ -990,6 +992,10 @@ static int __init inet6_init(void) if (err) goto calipso_fail; + err = seg6_init(); + if (err) + goto seg6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1000,8 +1006,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - calipso_exit(); + seg6_exit(); #endif +seg6_fail: + calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..189eb10b742d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..c5d76d2edd26 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; @@ -715,6 +716,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } + if (np->rxopt.bits.recvfragsize && opt->frag_max_size) { + int val = opt->frag_max_size; + + put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val); + } } void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..218f0cba231c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 139ceb68bd37..926818c331e5 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,6 +47,11 @@ #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif +#include <linux/seg6.h> +#include <net/seg6.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif #include <linux/uaccess.h> @@ -286,6 +291,182 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } +static void seg6_update_csum(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *hdr; + struct in6_addr *addr; + __be32 from, to; + + /* srh is at transport offset and seg_left is already decremented + * but daddr is not yet updated with next segment + */ + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + addr = hdr->segments + hdr->segments_left; + + hdr->segments_left++; + from = *(__be32 *)hdr; + + hdr->segments_left--; + to = *(__be32 *)hdr; + + /* update skb csum with diff resulting from seg_left decrement */ + + update_csum_diff4(skb, from, to); + + /* compute csum diff between current and next segment and update */ + + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), + (__be32 *)addr); +} + +static int ipv6_srh_rcv(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *hdr; + struct inet6_dev *idev; + struct in6_addr *addr; + bool cleanup = false; + int accept_seg6; + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + idev = __in6_dev_get(skb->dev); + + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; + if (accept_seg6 > idev->cnf.seg6_enabled) + accept_seg6 = idev->cnf.seg6_enabled; + + if (!accept_seg6) { + kfree_skb(skb); + return -1; + } + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (!seg6_hmac_validate_skb(skb)) { + kfree_skb(skb); + return -1; + } +#endif + +looped_back: + if (hdr->segments_left > 0) { + if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && + sr_has_cleanup(hdr)) + cleanup = true; + } else { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + kfree_skb(skb); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + hdr->segments_left--; + addr = hdr->segments + hdr->segments_left; + + skb_push(skb, sizeof(struct ipv6hdr)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + seg6_update_csum(skb); + + ipv6_hdr(skb)->daddr = *addr; + + if (cleanup) { + int srhlen = (hdr->hdrlen + 1) << 3; + int nh = hdr->nexthdr; + + skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); + memmove(skb_network_header(skb) + srhlen, + skb_network_header(skb), + (unsigned char *)hdr - skb_network_header(skb)); + skb->network_header += srhlen; + ipv6_hdr(skb)->nexthdr = nh; + ipv6_hdr(skb)->payload_len = htons(skb->len - + sizeof(struct ipv6hdr)); + skb_push_rcsum(skb, sizeof(struct ipv6hdr)); + } + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + /* be sure that srh is still present before reinjecting */ + if (!cleanup) { + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -326,6 +507,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } + /* segment routing */ + if (hdr->type == IPV6_SRCRT_TYPE_4) + return ipv6_srh_rcv(skb); + looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { @@ -679,9 +864,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb) * for headers. */ -static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p) +static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -704,6 +889,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, *proto = NEXTHDR_ROUTING; } +static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + struct ipv6_sr_hdr *sr_phdr, *sr_ihdr; + int plen, hops; + + sr_ihdr = (struct ipv6_sr_hdr *)opt; + plen = (sr_ihdr->hdrlen + 1) << 3; + + sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen); + memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr)); + + hops = sr_ihdr->first_segment + 1; + memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1, + (hops - 1) * sizeof(struct in6_addr)); + + sr_phdr->segments[0] = **addr_p; + *addr_p = &sr_ihdr->segments[hops - 1]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(sr_phdr)) { + struct net *net = NULL; + + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + + WARN_ON(!net); + + if (net) + seg6_push_hmac(net, saddr, sr_phdr); + } +#endif + + sr_phdr->nexthdr = *proto; + *proto = NEXTHDR_ROUTING; +} + +static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + switch (opt->type) { + case IPV6_SRCRT_TYPE_0: + ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); + break; + case IPV6_SRCRT_TYPE_4: + ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); + break; + default: + break; + } +} + static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) { struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); @@ -715,10 +956,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, - struct in6_addr **daddr) + struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { - ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); + ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). @@ -945,7 +1186,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, return NULL; *orig = fl6->daddr; - fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + + switch (opt->srcrt->type) { + case IPV6_SRCRT_TYPE_0: + fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + break; + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; + + fl6->daddr = srh->segments[srh->first_segment]; + break; + } + default: + return NULL; + } + return orig; } EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..ab249fee616b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -484,6 +485,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -658,6 +660,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 628ae6d85b59..af8f52ee7180 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -474,7 +474,15 @@ static int ila_nl_dump_start(struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, GFP_KERNEL); @@ -482,16 +490,18 @@ static int ila_nl_dump_start(struct netlink_callback *cb) static int ila_nl_dump_done(struct netlink_callback *cb) { - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; rhashtable_walk_exit(&iter->rhiter); + kfree(iter); + return 0; } static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; struct rhashtable_iter *rhiter = &iter->rhiter; struct ila_map *ila; int ret; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 532c3ef282c5..1c86c478f578 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -88,6 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -136,6 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 00cf28ad4565..02761c9fe43e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; @@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -263,13 +264,15 @@ EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { + int err = 0; + if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); local_bh_enable(); } - return 0; + return err; } EXPORT_SYMBOL_GPL(inet6_hash); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b..710bc79f9113 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e7bfd55899a3..1fcf61f1cbc3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -246,7 +246,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..ddc878d2cc6d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -203,7 +203,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, + &fl6->saddr); } skb_push(skb, sizeof(struct ipv6hdr)); @@ -1672,7 +1673,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); + ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3a70567846aa..259e8507d2cd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -157,6 +157,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_any(&t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } @@ -164,6 +165,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && (t->dev->flags & IFF_UP)) return t; } @@ -1155,7 +1157,7 @@ route_lookup: if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL); } /* Calculate max headroom for all the headers and adjust @@ -1170,6 +1172,7 @@ route_lookup: if (err) return err; + skb->protocol = htons(ETH_P_IPV6); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1237,6 +1240,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1315,6 +1320,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 35c5b2d8c401..af3f0e011265 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7f4265b1649b..52101b37ad6e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || + if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, sizeof(*pim), IPPROTO_PIM, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5330262ab673..3ba530373560 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -52,6 +52,7 @@ #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/seg6.h> #include <asm/uaccess.h> @@ -120,6 +121,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, static bool setsockopt_needs_rtnl(int optname) { switch (optname) { + case IPV6_ADDRFORM: case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: case IPV6_JOIN_ANYCAST: @@ -198,7 +200,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } fl6_free_socklist(sk); - ipv6_sock_mc_close(sk); + __ipv6_sock_mc_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so @@ -429,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; #endif + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *) + opt->srcrt; + + if (!seg6_validate_srh(srh, optlen)) + goto sticky_done; + break; + } default: goto sticky_done; } @@ -867,6 +878,10 @@ pref_skip_coa: np->autoflowlabel = valbool; retv = 0; break; + case IPV6_RECVFRAGSIZE: + np->rxopt.bits.recvfragsize = valbool; + retv = 0; + break; } release_sock(sk); @@ -1309,6 +1324,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->autoflowlabel; break; + case IPV6_RECVFRAGSIZE: + val = np->rxopt.bits.recvfragsize; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 75c1fc54f188..14a3903f1c82 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -276,16 +276,14 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return idev; } -void ipv6_sock_mc_close(struct sock *sk) +void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - if (!rcu_access_pointer(np->ipv6_mc_list)) - return; + ASSERT_RTNL(); - rtnl_lock(); while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { struct net_device *dev; @@ -303,8 +301,17 @@ void ipv6_sock_mc_close(struct sock *sk) atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); - } +} + +void ipv6_sock_mc_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!rcu_access_pointer(np->ipv6_mc_list)) + return; + rtnl_lock(); + __ipv6_sock_mc_close(sk); rtnl_unlock(); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index e10a04c9cdc7..6acb2eecd986 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV6 + tristate "IPv6 socket lookup support" + help + This option enables the IPv6 socket lookup infrastructure. This + is used by the ip6tables socket match. + if NF_TABLES config NF_TABLES_IPV6 @@ -54,6 +60,14 @@ config NFT_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. +config NFT_FIB_IPV6 + tristate "nf_tables fib / ipv6 route lookup support" + select NFT_FIB + help + This module enables IPv6 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV6 endif # NF_TABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b4f7d0b4e2af..fe180c96040e 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o + # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o @@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o +obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c new file mode 100644 index 000000000000..ebb2bf84232a --- /dev/null +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/inet6_hashtables.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/netfilter/nf_socket.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp6_fields(const struct sk_buff *skb, + unsigned int outside_hdrlen, + int *protocol, + const struct in6_addr **raddr, + const struct in6_addr **laddr, + __be16 *rport, + __be16 *lport, + struct ipv6hdr *ipv6_var) +{ + const struct ipv6hdr *inside_iph; + struct icmp6hdr *icmph, _icmph; + __be16 *ports, _ports[2]; + u8 inside_nexthdr; + __be16 inside_fragoff; + int inside_hdrlen; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); + if (inside_iph == NULL) + return 1; + inside_nexthdr = inside_iph->nexthdr; + + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), + &inside_nexthdr, &inside_fragoff); + if (inside_hdrlen < 0) + return 1; /* hjm: Packet has no/incomplete transport layer headers. */ + + if (inside_nexthdr != IPPROTO_TCP && + inside_nexthdr != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, inside_hdrlen, + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_nexthdr; + *laddr = &inside_iph->saddr; + *lport = ports[0]; + *raddr = &inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet6_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + + return NULL; +} + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; + int thoff = 0, tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NULL; + } + + if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + saddr = &iph->saddr; + sport = hp->source; + daddr = &iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); + + } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, + &sport, &dport, &ipv6_var)) + return NULL; + } else { + return NULL; + } + + return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, + sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure"); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c new file mode 100644 index 000000000000..ff1f1b6b4a4a --- /dev/null +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -0,0 +1,275 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +#include <net/ip6_fib.h> +#include <net/ip6_route.h> + +static bool fib6_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *)skb_dst(skb); + + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, + const struct nft_pktinfo *pkt, + const struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); + int lookup_flags = 0; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl6->daddr = iph->daddr; + fl6->saddr = iph->saddr; + } else { + fl6->daddr = iph->saddr; + fl6->saddr = iph->daddr; + } + + if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } + + if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) + lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; + + if (priv->flags & NFTA_FIB_F_MARK) + fl6->flowi6_mark = pkt->skb->mark; + + fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + + return lookup_flags; +} + +static u32 __nft_fib6_eval_type(const struct nft_fib *priv, + const struct nft_pktinfo *pkt) +{ + const struct net_device *dev = NULL; + const struct nf_ipv6_ops *v6ops; + const struct nf_afinfo *afinfo; + int route_err, addrtype; + struct rt6_info *rt; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + u32 ret = 0; + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (!afinfo) + return RTN_UNREACHABLE; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + dev = pkt->out; + + nft_fib6_flowi_init(&fl6, priv, pkt, dev); + + v6ops = nf_get_ipv6_ops(); + if (dev && v6ops && v6ops->chk_addr(pkt->net, &fl6.daddr, dev, true)) + ret = RTN_LOCAL; + + route_err = afinfo->route(pkt->net, (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); + if (route_err) + goto err; + + if (rt->rt6i_flags & RTF_REJECT) { + route_err = rt->dst.error; + dst_release(&rt->dst); + goto err; + } + + if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + ret = RTN_ANYCAST; + else if (!dev && rt->rt6i_flags & RTF_LOCAL) + ret = RTN_LOCAL; + + dst_release(&rt->dst); + + if (ret) + return ret; + + addrtype = ipv6_addr_type(&fl6.daddr); + + if (addrtype & IPV6_ADDR_MULTICAST) + return RTN_MULTICAST; + if (addrtype & IPV6_ADDR_UNICAST) + return RTN_UNICAST; + + return RTN_UNSPEC; + err: + switch (route_err) { + case -EINVAL: + return RTN_BLACKHOLE; + case -EACCES: + return RTN_PROHIBIT; + case -EAGAIN: + return RTN_THROW; + default: + break; + } + + return RTN_UNREACHABLE; +} + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + + *dest = __nft_fib6_eval_type(priv, pkt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval_type); + +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + const struct net_device *oif = NULL; + u32 *dest = ®s->data[priv->dreg]; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + struct rt6_info *rt; + int lookup_flags; + + if (priv->flags & NFTA_FIB_F_IIF) + oif = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + oif = pkt->out; + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + + if (pkt->hook == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + *dest = 0; + again: + rt = (void *)ip6_route_lookup(pkt->net, &fl6, lookup_flags); + if (rt->dst.error) + goto put_rt_err; + + /* Should not see RTF_LOCAL here */ + if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + goto put_rt_err; + + if (oif && oif != rt->rt6i_idev->dev) { + /* multipath route? Try again with F_IFACE */ + if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = oif->ifindex; + ip6_rt_put(rt); + goto again; + } + } + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = rt->rt6i_idev->dev->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } + + put_rt_err: + ip6_rt_put(rt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval); + +static struct nft_expr_type nft_fib6_type; + +static const struct nft_expr_ops nft_fib6_type_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib6_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib6_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib6_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib6_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib6_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib6_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib6_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, +}; + +static int __init nft_fib6_module_init(void) +{ + return nft_register_expr(&nft_fib6_type); +} + +static void __exit nft_fib6_module_exit(void) +{ + nft_unregister_expr(&nft_fib6_type); +} +module_init(nft_fib6_module_init); +module_exit(nft_fib6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 0e983b694ee8..e1f8b34d7a2e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -180,7 +181,7 @@ struct proto pingv6_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .sendmsg = ping_v6_sendmsg, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d7e8b955ade8..291ebc260e70 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -776,6 +776,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; @@ -1243,7 +1244,7 @@ struct proto rawv6_prot = { .close = rawv6_close, .destroy = raw6_destroy, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2160d5d009cb..e1da5b888cc4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, { struct sk_buff *prev, *next; struct net_device *dev; - int offset, end; + int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; @@ -336,6 +336,10 @@ found: fq->ecn |= ecn; add_frag_mem_limit(fq->q.net, skb->truesize); + fragsize = -skb_network_offset(skb) + skb->len; + if (fragsize > fq->q.max_size) + fq->q.max_size = fragsize; + /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ @@ -456,7 +460,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); + if (skb_mac_header_was_set(head)) + head->mac_header += sizeof(struct frag_hdr); head->network_header += sizeof(struct frag_hdr); skb_reset_transport_header(head); @@ -494,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; IP6CB(head)->flags |= IP6SKB_FRAGMENTED; + IP6CB(head)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ skb_postpush_rcsum(head, skb_network_header(head), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bdbc38e8bf29..6aa014eedccd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref); static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr, + struct net_device *dev); #endif struct uncached_list { @@ -656,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, struct net_device *dev = rt->dst.dev; if (dev && !netif_carrier_ok(dev) && - idev->cnf.ignore_routes_with_linkdown) + idev->cnf.ignore_routes_with_linkdown && + !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; if (rt6_check_expired(rt)) @@ -803,7 +806,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_dflt_router(gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, - gwaddr, dev->ifindex); + gwaddr, dev); if (rt && !lifetime) { ip6_del_rt(rt); @@ -811,8 +814,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -1050,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; @@ -1401,7 +1405,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1413,6 +1417,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1426,7 +1431,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1518,7 +1523,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1531,6 +1537,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1552,6 +1559,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1560,7 +1568,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); @@ -1789,7 +1798,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = RT6_LOOKUP_F_IFACE; + int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; table = fib6_get_table(net, cfg->fc_table); if (!table) @@ -2325,13 +2334,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr, + struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; + int ifindex = dev->ifindex; struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(net, tb_id); if (!table) return NULL; @@ -2357,12 +2369,13 @@ out: static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref) { struct fib6_config cfg = { .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), @@ -2371,7 +2384,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2381,16 +2394,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2424,20 +2438,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, cfg.fc_gateway = *gwaddr; - ip6_route_add(&cfg); + if (!ip6_route_add(&cfg)) { + struct fib6_table *table; + + table = fib6_get_table(dev_net(dev), cfg.fc_table); + if (table) + table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; + } return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) +static void __rt6_purge_dflt_routers(struct fib6_table *table) { struct rt6_info *rt; - struct fib6_table *table; - - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (!table) - return; restart: read_lock_bh(&table->tb6_lock); @@ -2451,6 +2465,27 @@ restart: } } read_unlock_bh(&table->tb6_lock); + + table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; +} + +void rt6_purge_dflt_routers(struct net *net) +{ + struct fib6_table *table; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, head, tb6_hlist) { + if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) + __rt6_purge_dflt_routers(table); + } + } + + rcu_read_unlock(); } static void rtmsg_to_fib6_config(struct net *net, @@ -2767,6 +2802,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3341,6 +3377,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c new file mode 100644 index 000000000000..50f6e0663d1d --- /dev/null +++ b/net/ipv6/seg6.c @@ -0,0 +1,487 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/slab.h> + +#include <net/ipv6.h> +#include <net/protocol.h> + +#include <net/seg6.h> +#include <net/genetlink.h> +#include <linux/seg6.h> +#include <linux/seg6_genl.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) +{ + int trailing; + unsigned int tlv_offset; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (((srh->hdrlen + 1) << 3) != len) + return false; + + if (srh->segments_left != srh->first_segment) + return false; + + tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); + + trailing = len - tlv_offset; + if (trailing < 0) + return false; + + while (trailing) { + struct sr6_tlv *tlv; + unsigned int tlv_len; + + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + + trailing -= tlv_len; + if (trailing < 0) + return false; + + tlv_offset += tlv_len; + } + + return true; +} + +static struct genl_family seg6_genl_family; + +static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { + [SEG6_ATTR_DST] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, }, + [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, }, + [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, }, + [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, }, + [SEG6_ATTR_ALGID] = { .type = NLA_U8, }, + [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, }, +}; + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + u32 hmackeyid; + char *secret; + int err = 0; + u8 algid; + u8 slen; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_HMACKEYID] || + !info->attrs[SEG6_ATTR_SECRETLEN] || + !info->attrs[SEG6_ATTR_ALGID]) + return -EINVAL; + + hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]); + slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]); + algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]); + + if (hmackeyid == 0) + return -EINVAL; + + if (slen > SEG6_HMAC_SECRET_LEN) + return -EINVAL; + + mutex_lock(&sdata->lock); + hinfo = seg6_hmac_info_lookup(net, hmackeyid); + + if (!slen) { + if (!hinfo) + err = -ENOENT; + + err = seg6_hmac_info_del(net, hmackeyid); + + goto out_unlock; + } + + if (!info->attrs[SEG6_ATTR_SECRET]) { + err = -EINVAL; + goto out_unlock; + } + + if (hinfo) { + err = seg6_hmac_info_del(net, hmackeyid); + if (err) + goto out_unlock; + } + + secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]); + + hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL); + if (!hinfo) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(hinfo->secret, secret, slen); + hinfo->slen = slen; + hinfo->alg_id = algid; + hinfo->hmackeyid = hmackeyid; + + err = seg6_hmac_info_add(net, hmackeyid, hinfo); + if (err) + kfree(hinfo); + +out_unlock: + mutex_unlock(&sdata->lock); + return err; +} + +#else + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + return -ENOTSUPP; +} + +#endif + +static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *val, *t_old, *t_new; + struct seg6_pernet_data *sdata; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_DST]) + return -EINVAL; + + val = nla_data(info->attrs[SEG6_ATTR_DST]); + t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + + mutex_lock(&sdata->lock); + + t_old = sdata->tun_src; + rcu_assign_pointer(sdata->tun_src, t_new); + + mutex_unlock(&sdata->lock); + + synchronize_net(); + kfree(t_old); + + return 0; +} + +static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *tun_src; + struct sk_buff *msg; + void *hdr; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC); + if (!hdr) + goto free_msg; + + rcu_read_lock(); + tun_src = rcu_dereference(seg6_pernet(net)->tun_src); + + if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src)) + goto nla_put_failure; + + rcu_read_unlock(); + + genlmsg_end(msg, hdr); + genlmsg_reply(msg, info); + + return 0; + +nla_put_failure: + rcu_read_unlock(); + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -ENOMEM; +} + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo, + struct sk_buff *msg) +{ + if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) || + nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) || + nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) || + nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id)) + return -1; + + return 0; +} + +static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (__seg6_hmac_fill_info(hinfo, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct seg6_pernet_data *sdata; + struct rhashtable_iter *iter; + + sdata = seg6_pernet(net); + iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&sdata->hmac_infos, iter); + + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + + kfree(iter); + + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + struct net *net = sock_net(skb->sk); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + int ret; + + sdata = seg6_pernet(net); + + ret = rhashtable_walk_start(iter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + hinfo = rhashtable_walk_next(iter); + + if (IS_ERR(hinfo)) { + if (PTR_ERR(hinfo) == -EAGAIN) + continue; + ret = PTR_ERR(hinfo); + goto done; + } else if (!hinfo) { + break; + } + + ret = __seg6_genl_dumphmac_element(hinfo, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, SEG6_CMD_DUMPHMAC); + if (ret) + goto done; + } + + ret = skb->len; + +done: + rhashtable_walk_stop(iter); + return ret; +} + +#else + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +#endif + +static int __net_init seg6_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata; + + sdata = kzalloc(sizeof(*sdata), GFP_KERNEL); + if (!sdata) + return -ENOMEM; + + mutex_init(&sdata->lock); + + sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL); + if (!sdata->tun_src) { + kfree(sdata); + return -ENOMEM; + } + + net->ipv6.seg6_data = sdata; + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_init(net); +#endif + + return 0; +} + +static void __net_exit seg6_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_exit(net); +#endif + + kfree(sdata->tun_src); + kfree(sdata); +} + +static struct pernet_operations ip6_segments_ops = { + .init = seg6_net_init, + .exit = seg6_net_exit, +}; + +static const struct genl_ops seg6_genl_ops[] = { + { + .cmd = SEG6_CMD_SETHMAC, + .doit = seg6_genl_sethmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_DUMPHMAC, + .start = seg6_genl_dumphmac_start, + .dumpit = seg6_genl_dumphmac, + .done = seg6_genl_dumphmac_done, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_SET_TUNSRC, + .doit = seg6_genl_set_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_GET_TUNSRC, + .doit = seg6_genl_get_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family seg6_genl_family __ro_after_init = { + .hdrsize = 0, + .name = SEG6_GENL_NAME, + .version = SEG6_GENL_VERSION, + .maxattr = SEG6_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = seg6_genl_ops, + .n_ops = ARRAY_SIZE(seg6_genl_ops), + .module = THIS_MODULE, +}; + +int __init seg6_init(void) +{ + int err = -ENOMEM; + + err = genl_register_family(&seg6_genl_family); + if (err) + goto out; + + err = register_pernet_subsys(&ip6_segments_ops); + if (err) + goto out_unregister_genl; + + err = seg6_iptunnel_init(); + if (err) + goto out_unregister_pernet; + +#ifdef CONFIG_IPV6_SEG6_HMAC + err = seg6_hmac_init(); + if (err) + goto out_unregister_iptun; +#endif + + pr_info("Segment Routing with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_SEG6_HMAC +out_unregister_iptun: + seg6_iptunnel_exit(); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); +out_unregister_genl: + genl_unregister_family(&seg6_genl_family); + goto out; +} + +void seg6_exit(void) +{ +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_exit(); +#endif + seg6_iptunnel_exit(); + unregister_pernet_subsys(&ip6_segments_ops); + genl_unregister_family(&seg6_genl_family); +} diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c new file mode 100644 index 000000000000..ef1c8a46e7ac --- /dev/null +++ b/net/ipv6/seg6_hmac.c @@ -0,0 +1,484 @@ +/* + * SR-IPv6 implementation -- HMAC functions + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <linux/mroute6.h> +#include <linux/slab.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/xfrm.h> + +#include <linux/cryptohash.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include <net/seg6.h> +#include <net/genetlink.h> +#include <net/seg6_hmac.h> +#include <linux/random.h> + +static char * __percpu *hmac_ring; + +static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct seg6_hmac_info *hinfo = obj; + + return (hinfo->hmackeyid != *(__u32 *)arg->key); +} + +static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) +{ + kfree_rcu(hinfo, rcu); +} + +static void seg6_free_hi(void *ptr, void *arg) +{ + struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; + + if (hinfo) + seg6_hinfo_release(hinfo); +} + +static const struct rhashtable_params rht_params = { + .head_offset = offsetof(struct seg6_hmac_info, node), + .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), + .key_len = sizeof(u32), + .automatic_shrinking = true, + .obj_cmpfn = seg6_hmac_cmpfn, +}; + +static struct seg6_hmac_algo hmac_algos[] = { + { + .alg_id = SEG6_HMAC_ALGO_SHA1, + .name = "hmac(sha1)", + }, + { + .alg_id = SEG6_HMAC_ALGO_SHA256, + .name = "hmac(sha256)", + }, +}; + +static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) +{ + struct sr6_tlv_hmac *tlv; + + if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) + return NULL; + + if (!sr_has_hmac(srh)) + return NULL; + + tlv = (struct sr6_tlv_hmac *) + ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); + + if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) + return NULL; + + return tlv; +} + +static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) +{ + struct seg6_hmac_algo *algo; + int i, alg_count; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + if (algo->alg_id == alg_id) + return algo; + } + + return NULL; +} + +static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, + u8 *output, int outlen) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int ret, dgsize; + + algo = __hmac_get_algo(hinfo->alg_id); + if (!algo) + return -ENOENT; + + tfm = *this_cpu_ptr(algo->tfms); + + dgsize = crypto_shash_digestsize(tfm); + if (dgsize > outlen) { + pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", + dgsize, outlen); + return -ENOMEM; + } + + ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); + goto failed; + } + + shash = *this_cpu_ptr(algo->shashs); + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, text, psize, output); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); + goto failed; + } + + return dgsize; + +failed: + return ret; +} + +int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, + struct in6_addr *saddr, u8 *output) +{ + __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); + u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; + int plen, i, dgsize, wrsize; + char *ring, *off; + + /* a 160-byte buffer for digest output allows to store highest known + * hash function (RadioGatun) with up to 1216 bits + */ + + /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; + + /* this limit allows for 14 segments */ + if (plen >= SEG6_HMAC_RING_SIZE) + return -EMSGSIZE; + + /* Let's build the HMAC text on the ring buffer. The text is composed + * as follows, in order: + * + * 1. Source IPv6 address (128 bits) + * 2. first_segment value (8 bits) + * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 4. HMAC Key ID (32 bits) + * 5. All segments in the segments list (n * 128 bits) + */ + + local_bh_disable(); + ring = *this_cpu_ptr(hmac_ring); + off = ring; + + /* source address */ + memcpy(off, saddr, 16); + off += 16; + + /* first_segment value */ + *off++ = hdr->first_segment; + + /* cleanup flag */ + *off++ = !!(sr_has_cleanup(hdr)) << 7; + + /* HMAC Key ID */ + memcpy(off, &hmackeyid, 4); + off += 4; + + /* all segments in the list */ + for (i = 0; i < hdr->first_segment + 1; i++) { + memcpy(off, hdr->segments + i, 16); + off += 16; + } + + dgsize = __do_hmac(hinfo, ring, plen, tmp_out, + SEG6_HMAC_MAX_DIGESTSIZE); + local_bh_enable(); + + if (dgsize < 0) + return dgsize; + + wrsize = SEG6_HMAC_FIELD_LEN; + if (wrsize > dgsize) + wrsize = dgsize; + + memset(output, 0, SEG6_HMAC_FIELD_LEN); + memcpy(output, tmp_out, wrsize); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_compute); + +/* checks if an incoming SR-enabled packet's HMAC status matches + * the incoming policy. + * + * called with rcu_read_lock() + */ +bool seg6_hmac_validate_skb(struct sk_buff *skb) +{ + u8 hmac_output[SEG6_HMAC_FIELD_LEN]; + struct net *net = dev_net(skb->dev); + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; + + idev = __in6_dev_get(skb->dev); + + srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + tlv = seg6_get_tlv_hmac(srh); + + /* mandatory check but no tlv */ + if (idev->cnf.seg6_require_hmac > 0 && !tlv) + return false; + + /* no check */ + if (idev->cnf.seg6_require_hmac < 0) + return true; + + /* check only if present */ + if (idev->cnf.seg6_require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + return false; + + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + + if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + return false; + + return true; +} +EXPORT_SYMBOL(seg6_hmac_validate_skb); + +/* called with rcu_read_lock() */ +struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + + return hinfo; +} +EXPORT_SYMBOL(seg6_hmac_info_lookup); + +int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + int err; + + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_add); + +int seg6_hmac_info_del(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + int err = -ENOENT; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + if (!hinfo) + goto out; + + err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + if (err) + goto out; + + seg6_hinfo_release(hinfo); + +out: + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_del); + +int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh) +{ + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + int err = -ENOENT; + + tlv = seg6_get_tlv_hmac(srh); + if (!tlv) + return -EINVAL; + + rcu_read_lock(); + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + goto out; + + memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); + err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(seg6_push_hmac); + +static int seg6_hmac_init_ring(void) +{ + int i; + + hmac_ring = alloc_percpu(char *); + + if (!hmac_ring) + return -ENOMEM; + + for_each_possible_cpu(i) { + char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL); + + if (!ring) + return -ENOMEM; + + *per_cpu_ptr(hmac_ring, i) = ring; + } + + return 0; +} + +static int seg6_hmac_init_algo(void) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + + for (i = 0; i < alg_count; i++) { + struct crypto_shash **p_tfm; + int shsize; + + algo = &hmac_algos[i]; + algo->tfms = alloc_percpu(struct crypto_shash *); + if (!algo->tfms) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); + *p_tfm = tfm; + } + + p_tfm = this_cpu_ptr(algo->tfms); + tfm = *p_tfm; + + shsize = sizeof(*shash) + crypto_shash_descsize(tfm); + + algo->shashs = alloc_percpu(struct shash_desc *); + if (!algo->shashs) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + shash = kzalloc(shsize, GFP_KERNEL); + if (!shash) + return -ENOMEM; + *per_cpu_ptr(algo->shashs, cpu) = shash; + } + } + + return 0; +} + +int __init seg6_hmac_init(void) +{ + int ret; + + ret = seg6_hmac_init_ring(); + if (ret < 0) + goto out; + + ret = seg6_hmac_init_algo(); + +out: + return ret; +} +EXPORT_SYMBOL(seg6_hmac_init); + +int __net_init seg6_hmac_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_net_init); + +void seg6_hmac_exit(void) +{ + struct seg6_hmac_algo *algo = NULL; + int i, alg_count, cpu; + + for_each_possible_cpu(i) { + char *ring = *per_cpu_ptr(hmac_ring, i); + + kfree(ring); + } + free_percpu(hmac_ring); + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + for_each_possible_cpu(cpu) { + struct crypto_shash *tfm; + struct shash_desc *shash; + + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); + free_percpu(algo->shashs); + } +} +EXPORT_SYMBOL(seg6_hmac_exit); + +void __net_exit seg6_hmac_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); +} +EXPORT_SYMBOL(seg6_hmac_net_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c new file mode 100644 index 000000000000..bbfca22c34ae --- /dev/null +++ b/net/ipv6/seg6_iptunnel.c @@ -0,0 +1,431 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/seg6.h> +#include <linux/seg6.h> +#include <linux/seg6_iptunnel.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#ifdef CONFIG_DST_CACHE +#include <net/dst_cache.h> +#endif +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +struct seg6_lwt { +#ifdef CONFIG_DST_CACHE + struct dst_cache cache; +#endif + struct seg6_iptunnel_encap tuninfo[0]; +}; + +static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_lwt *)lwt->data; +} + +static inline struct seg6_iptunnel_encap * +seg6_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return seg6_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { + [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) +{ + struct seg6_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo, len); + + return 0; +} + +static void set_tun_src(struct net *net, struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct in6_addr *tun_src; + + rcu_read_lock(); + + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, + saddr); + } + + rcu_read_unlock(); +} + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + struct ipv6hdr *hdr, *inner_hdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, tot_len, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + + err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* inherit tc, flowlabel and hlim + * hlim will be decremented in ip6_forward() afterwards and + * decapsulation will overwrite inner hlim with outer hlim + */ + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = NEXTHDR_IPV6; + + hdr->daddr = isrh->segments[isrh->first_segment]; + set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +#ifdef CONFIG_IPV6_SEG6_INLINE +static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + + err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + + memmove(hdr, oldhdr, sizeof(*hdr)); + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh->segments[0] = hdr->daddr; + hdr->daddr = isrh->segments[isrh->first_segment]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + struct net *net = dev_net(skb_dst(skb)->dev); + + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + return 0; +} +#endif + +static int seg6_do_srh(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; + int err = 0; + + tinfo = seg6_encap_lwtunnel(dst->lwtstate); + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + switch (tinfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + err = seg6_do_srh_inline(skb, tinfo->srh); + skb_reset_inner_headers(skb); + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + err = seg6_do_srh_encap(skb, tinfo->srh); + break; + } + + if (err) + return err; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_set_inner_protocol(skb, skb->protocol); + + return 0; +} + +int seg6_input(struct sk_buff *skb) +{ + int err; + + err = seg6_do_srh(skb); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); +} + +int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; + int err = -EINVAL; + + err = seg6_do_srh(skb); + if (unlikely(err)) + goto drop; + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +#ifdef CONFIG_DST_CACHE + dst = dst_cache_get(&slwt->cache); +#endif + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + +#ifdef CONFIG_DST_CACHE + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); +#endif + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); +drop: + kfree_skb(skb); + return err; +} + +static int seg6_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; + struct seg6_iptunnel_encap *tuninfo; + struct lwtunnel_state *newts; + int tuninfo_len, min_size; + struct seg6_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy); + + if (err < 0) + return err; + + if (!tb[SEG6_IPTUNNEL_SRH]) + return -EINVAL; + + tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); + tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); + + /* tuninfo must contain at least the iptunnel encap structure, + * the SRH and one segment + */ + min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + + sizeof(struct in6_addr); + if (tuninfo_len < min_size) + return -EINVAL; + + switch (tuninfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + break; + default: + return -EINVAL; + } + + /* verify that SRH is consistent */ + if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo))) + return -EINVAL; + + newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_lwt_lwtunnel(newts); + +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&slwt->cache, GFP_KERNEL); + if (err) { + kfree(newts); + return err; + } +#endif + + memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + + newts->type = LWTUNNEL_ENCAP_SEG6; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); + + *ts = newts; + + return 0; +} + +#ifdef CONFIG_DST_CACHE +static void seg6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); +} +#endif + +static int seg6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); +} + +static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); + struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); + + if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops seg6_iptun_ops = { + .build_state = seg6_build_state, +#ifdef CONFIG_DST_CACHE + .destroy_state = seg6_destroy_state, +#endif + .output = seg6_output, + .input = seg6_input, + .fill_encap = seg6_fill_encap_info, + .get_encap_size = seg6_encap_nlsize, + .cmp_encap = seg6_encap_cmp, +}; + +int __init seg6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} + +void seg6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..97830a6a9cbb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -227,6 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..aece1b15e744 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -824,6 +825,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 71963b23d5a5..5313818b7485 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -343,8 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -425,7 +424,8 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, sk, skb, + sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); @@ -1137,6 +1137,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a2ed3bda4ddc..85948c69b236 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -715,7 +715,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_info(session, L2TP_MSG_SEQ, "%s: requested to enable seq numbers by LNS\n", session->name); - session->send_seq = -1; + session->send_seq = 1; l2tp_session_set_header_len(session, tunnel->version); } } else { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 42de4ccd159f..fce25afb652a 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -338,7 +338,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ea2ae6664cc8..1cea54feab27 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,7 +410,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, @@ -519,6 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 59aa2d204e4a..3620fba31786 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -220,14 +220,14 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]); if (info->attrs[L2TP_ATTR_UDP_DPORT]) cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); - if (info->attrs[L2TP_ATTR_UDP_CSUM]) - cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); + cfg.use_udp_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_CSUM]); #if IS_ENABLED(CONFIG_IPV6) - if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) - cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); - if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) - cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); + cfg.udp6_zero_tx_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + cfg.udp6_zero_rx_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); #endif } @@ -379,9 +379,24 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: + switch (sk->sk_family) { + case AF_INET: + if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) + goto nla_put_failure; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (udp_get_no_check6_tx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX)) + goto nla_put_failure; + if (udp_get_no_check6_rx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX)) + goto nla_put_failure; + break; +#endif + } if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || - nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || - nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) + nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) goto nla_put_failure; /* NOBREAK */ case L2TP_ENCAPTYPE_IP: diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 41d47bfda15c..2ddfec1e4acf 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1272,7 +1272,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->recv_seq = val ? -1 : 0; + session->recv_seq = !!val; l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set recv_seq=%d\n", session->name, session->recv_seq); @@ -1283,7 +1283,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->send_seq = val ? -1 : 0; + session->send_seq = !!val; { struct sock *ssk = ps->sock; struct pppox_sock *po = pppox_sk(ssk); @@ -1301,7 +1301,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->lns_mode = val ? -1 : 0; + session->lns_mode = !!val; l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set lns_mode=%d\n", session->name, session->lns_mode); diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index f9137a8341f4..0b202b343fd4 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -19,6 +19,7 @@ mac80211-y := \ aes_gcm.o \ aes_cmac.o \ aes_gmac.o \ + fils_aead.o \ cfg.o \ ethtool.o \ rx.o \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7663c28ba353..a4e0d59a40dd 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -18,21 +18,24 @@ #include "key.h" #include "aes_ccm.h" -void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist sg[3]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *) aead_req_data; + aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; - memset(aead_req, 0, sizeof(aead_req_data)); + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, CCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -41,6 +44,9 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, aead_request_set_ad(aead_req, sg[0].length); crypto_aead_encrypt(aead_req); + kzfree(aead_req); + + return 0; } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, @@ -48,18 +54,23 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t mic_len) { struct scatterlist sg[3]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *) aead_req_data; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; + int err; if (data_len == 0) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, CCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -67,7 +78,10 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0); aead_request_set_ad(aead_req, sg[0].length); - return crypto_aead_decrypt(aead_req); + err = crypto_aead_decrypt(aead_req); + kzfree(aead_req); + + return err; } struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 6a73d1e4d186..fcd3254c5cf0 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -12,12 +12,14 @@ #include <linux/crypto.h> +#define CCM_AAD_LEN 32 + struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len); -void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); +int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic, size_t mic_len); diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index bdf0790d89cc..d0bd5fff5f0a 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -23,7 +23,7 @@ #define AAD_LEN 20 -static void gf_mulx(u8 *pad) +void gf_mulx(u8 *pad) { int i, carry; @@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad) pad[AES_BLOCK_SIZE - 1] ^= 0x87; } -static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, - const u8 *addr[], const size_t *len, u8 *mac, - size_t mac_len) +void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac, + size_t mac_len) { u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; const u8 *pos, *end; diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 3702041f44fd..c827e1d5de8b 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -11,6 +11,10 @@ #include <linux/crypto.h> +void gf_mulx(u8 *pad); +void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac, + size_t mac_len); struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], size_t key_len); void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c index 3afe361fd27c..8a4397cc1b08 100644 --- a/net/mac80211/aes_gcm.c +++ b/net/mac80211/aes_gcm.c @@ -15,20 +15,23 @@ #include "key.h" #include "aes_gcm.h" -void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) +int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[3]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; + aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; - memset(aead_req, 0, sizeof(aead_req_data)); + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, GCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); @@ -37,24 +40,31 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, aead_request_set_ad(aead_req, sg[0].length); crypto_aead_encrypt(aead_req); + kzfree(aead_req); + return 0; } int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[3]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; + int err; if (data_len == 0) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, GCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); @@ -63,7 +73,10 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, data_len + IEEE80211_GCMP_MIC_LEN, j_0); aead_request_set_ad(aead_req, sg[0].length); - return crypto_aead_decrypt(aead_req); + err = crypto_aead_decrypt(aead_req); + kzfree(aead_req); + + return err; } struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h index 1347fda6b76a..55aed5352494 100644 --- a/net/mac80211/aes_gcm.h +++ b/net/mac80211/aes_gcm.h @@ -11,8 +11,10 @@ #include <linux/crypto.h> -void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); +#define GCM_AAD_LEN 32 + +int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, u8 *data, size_t data_len, u8 *mic); struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c index 3ddd927aaf30..bd72a862ddb7 100644 --- a/net/mac80211/aes_gmac.c +++ b/net/mac80211/aes_gmac.c @@ -17,28 +17,27 @@ #include "key.h" #include "aes_gmac.h" -#define GMAC_MIC_LEN 16 -#define GMAC_NONCE_LEN 12 -#define AAD_LEN 20 - int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, const u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[4]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; - u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE]; + u8 *zero, *__aad, iv[AES_BLOCK_SIZE]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); if (data_len < GMAC_MIC_LEN) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + GMAC_MIC_LEN + GMAC_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + zero = (u8 *)aead_req + reqsize; + __aad = zero + GMAC_MIC_LEN; + memcpy(__aad, aad, GMAC_AAD_LEN); - memset(zero, 0, GMAC_MIC_LEN); sg_init_table(sg, 4); - sg_set_buf(&sg[0], aad, AAD_LEN); + sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); sg_set_buf(&sg[3], mic, GMAC_MIC_LEN); @@ -49,9 +48,10 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, aead_request_set_tfm(aead_req, tfm); aead_request_set_crypt(aead_req, sg, sg, 0, iv); - aead_request_set_ad(aead_req, AAD_LEN + data_len); + aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len); crypto_aead_encrypt(aead_req); + kzfree(aead_req); return 0; } diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h index d328204d73a8..32e6442c95be 100644 --- a/net/mac80211/aes_gmac.h +++ b/net/mac80211/aes_gmac.h @@ -11,6 +11,10 @@ #include <linux/crypto.h> +#define GMAC_AAD_LEN 20 +#define GMAC_MIC_LEN 16 +#define GMAC_NONCE_LEN 12 + struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], size_t key_len); int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f6749dced021..3b5fd4188f2a 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, mutex_lock(&sta->ampdu_mlme.mtx); if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { - tid_agg_rx = rcu_dereference_protected( - sta->ampdu_mlme.tid_rx[tid], - lockdep_is_held(&sta->ampdu_mlme.mtx)); - - if (tid_agg_rx->dialog_token == dialog_token) { + if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { ht_dbg_ratelimited(sta->sdata, "updated AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); @@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } /* update data */ - tid_agg_rx->dialog_token = dialog_token; tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; tid_agg_rx->buf_size = buf_size; @@ -418,6 +413,7 @@ end: if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + sta->ampdu_mlme.tid_rx_token[tid] = dialog_token; } mutex_unlock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fd6541f3ade3..e91e503bf992 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -357,10 +357,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, mutex_lock(&local->sta_mtx); if (mac_addr) { - if (ieee80211_vif_is_mesh(&sdata->vif)) - sta = sta_info_get(sdata, mac_addr); - else - sta = sta_info_get_bss(sdata, mac_addr); + sta = sta_info_get_bss(sdata, mac_addr); /* * The ASSOC test makes sure the driver is ready to * receive the key. When wpa_supplicant has roamed @@ -867,6 +864,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, } sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->vif.bss_conf.beacon_int = params->beacon_interval; + mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); @@ -897,7 +896,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, vlan->vif.type); } - sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; @@ -1523,9 +1521,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, goto out_err; if (params->vlan && params->vlan != sta->sdata->dev) { - bool prev_4addr = false; - bool new_4addr = false; - vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { @@ -1535,26 +1530,21 @@ static int ieee80211_change_station(struct wiphy *wiphy, } rcu_assign_pointer(vlansdata->u.vlan.sta, sta); - new_4addr = true; __ieee80211_check_fast_rx_iface(vlansdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - prev_4addr = true; - } + + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; ieee80211_check_fast_xmit(sta); - if (sta->sta_state == IEEE80211_STA_AUTHORIZED && - prev_4addr != new_4addr) { - if (new_4addr) - atomic_dec(&sta->sdata->bss->num_mcast_sta); - else - atomic_inc(&sta->sdata->bss->num_mcast_sta); - } + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ieee80211_vif_inc_num_mcast(sta->sdata); ieee80211_send_layer2_update(sta); } @@ -2480,13 +2470,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, smps_mode == IEEE80211_SMPS_AUTOMATIC) return 0; - /* If no associated stations, there's no need to do anything */ - if (!atomic_read(&sdata->u.ap.num_mcast_sta)) { - sdata->smps_mode = smps_mode; - ieee80211_queue_work(&sdata->local->hw, &sdata->recalc_smps); - return 0; - } - ht_dbg(sdata, "SMPS %d requested in AP mode, sending Action frame to %d stations\n", smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta)); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index f56e2f487d09..e02ba42ca827 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -210,6 +210,7 @@ static const char *hw_flag_names[] = { FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), FLAG(REPORTS_LOW_ACK), + FLAG(SUPPORTS_TX_FRAG), #undef FLAG }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index bcec1240f41d..1a05f85cb1f0 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -477,6 +477,7 @@ IEEE80211_IF_FILE_RW(tdls_wider_bw); IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC); IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.ps.dtim_count, DEC); +IEEE80211_IF_FILE(num_mcast_sta_vlan, u.vlan.num_mcast_sta, ATOMIC); static ssize_t ieee80211_if_fmt_num_buffered_multicast( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -684,6 +685,13 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } +static void add_vlan_files(struct ieee80211_sub_if_data *sdata) +{ + /* add num_mcast_sta_vlan using name num_mcast_sta */ + debugfs_create_file("num_mcast_sta", 0400, sdata->vif.debugfs_dir, + sdata, &num_mcast_sta_vlan_ops); +} + static void add_ibss_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD_MODE(tsf, 0600); @@ -787,6 +795,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_AP: add_ap_files(sdata); break; + case NL80211_IFTYPE_AP_VLAN: + add_vlan_files(sdata); + break; case NL80211_IFTYPE_WDS: add_wds_files(sdata); break; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a2fcdb47a0e6..f6003b8c2c33 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -199,13 +199,18 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + bool tid_rx_valid; + tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); + tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); - p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx); + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", + tid_rx_valid); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - tid_rx ? tid_rx->dialog_token : 0); + tid_rx_valid ? + sta->ampdu_mlme.tid_rx_token[i] : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c new file mode 100644 index 000000000000..ecfdd97758a3 --- /dev/null +++ b/net/mac80211/fils_aead.c @@ -0,0 +1,342 @@ +/* + * FILS AEAD for (Re)Association Request/Response frames + * Copyright 2016, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/skcipher.h> + +#include "ieee80211_i.h" +#include "aes_cmac.h" +#include "fils_aead.h" + +static int aes_s2v(struct crypto_cipher *tfm, + size_t num_elem, const u8 *addr[], size_t len[], u8 *v) +{ + u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE]; + size_t i; + const u8 *data[2]; + size_t data_len[2], data_elems; + + /* D = AES-CMAC(K, <zero>) */ + memset(tmp, 0, AES_BLOCK_SIZE); + data[0] = tmp; + data_len[0] = AES_BLOCK_SIZE; + aes_cmac_vector(tfm, 1, data, data_len, d, AES_BLOCK_SIZE); + + for (i = 0; i < num_elem - 1; i++) { + /* D = dbl(D) xor AES_CMAC(K, Si) */ + gf_mulx(d); /* dbl */ + aes_cmac_vector(tfm, 1, &addr[i], &len[i], tmp, + AES_BLOCK_SIZE); + crypto_xor(d, tmp, AES_BLOCK_SIZE); + } + + if (len[i] >= AES_BLOCK_SIZE) { + /* len(Sn) >= 128 */ + size_t j; + const u8 *pos; + + /* T = Sn xorend D */ + + /* Use a temporary buffer to perform xorend on Sn (addr[i]) to + * avoid modifying the const input argument. + */ + data[0] = addr[i]; + data_len[0] = len[i] - AES_BLOCK_SIZE; + pos = addr[i] + data_len[0]; + for (j = 0; j < AES_BLOCK_SIZE; j++) + tmp[j] = pos[j] ^ d[j]; + data[1] = tmp; + data_len[1] = AES_BLOCK_SIZE; + data_elems = 2; + } else { + /* len(Sn) < 128 */ + /* T = dbl(D) xor pad(Sn) */ + gf_mulx(d); /* dbl */ + memset(tmp, 0, AES_BLOCK_SIZE); + memcpy(tmp, addr[i], len[i]); + tmp[len[i]] = 0x80; + crypto_xor(d, tmp, AES_BLOCK_SIZE); + data[0] = d; + data_len[0] = sizeof(d); + data_elems = 1; + } + /* V = AES-CMAC(K, T) */ + aes_cmac_vector(tfm, data_elems, data, data_len, v, AES_BLOCK_SIZE); + + return 0; +} + +/* Note: addr[] and len[] needs to have one extra slot at the end. */ +static int aes_siv_encrypt(const u8 *key, size_t key_len, + const u8 *plain, size_t plain_len, + size_t num_elem, const u8 *addr[], + size_t len[], u8 *out) +{ + u8 v[AES_BLOCK_SIZE]; + struct crypto_cipher *tfm; + struct crypto_skcipher *tfm2; + struct skcipher_request *req; + int res; + struct scatterlist src[1], dst[1]; + u8 *tmp; + + key_len /= 2; /* S2V key || CTR key */ + + addr[num_elem] = plain; + len[num_elem] = plain_len; + num_elem++; + + /* S2V */ + + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + /* K1 for S2V */ + res = crypto_cipher_setkey(tfm, key, key_len); + if (!res) + res = aes_s2v(tfm, num_elem, addr, len, v); + crypto_free_cipher(tfm); + if (res) + return res; + + /* Use a temporary buffer of the plaintext to handle need for + * overwriting this during AES-CTR. + */ + tmp = kmemdup(plain, plain_len, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + /* IV for CTR before encrypted data */ + memcpy(out, v, AES_BLOCK_SIZE); + + /* Synthetic IV to be used as the initial counter in CTR: + * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31) + */ + v[8] &= 0x7f; + v[12] &= 0x7f; + + /* CTR */ + + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + if (IS_ERR(tfm2)) { + kfree(tmp); + return PTR_ERR(tfm2); + } + /* K2 for CTR */ + res = crypto_skcipher_setkey(tfm2, key + key_len, key_len); + if (res) + goto fail; + + req = skcipher_request_alloc(tfm2, GFP_KERNEL); + if (!req) { + res = -ENOMEM; + goto fail; + } + + sg_init_one(src, tmp, plain_len); + sg_init_one(dst, out + AES_BLOCK_SIZE, plain_len); + skcipher_request_set_crypt(req, src, dst, plain_len, v); + res = crypto_skcipher_encrypt(req); + skcipher_request_free(req); +fail: + kfree(tmp); + crypto_free_skcipher(tfm2); + return res; +} + +/* Note: addr[] and len[] needs to have one extra slot at the end. */ +static int aes_siv_decrypt(const u8 *key, size_t key_len, + const u8 *iv_crypt, size_t iv_c_len, + size_t num_elem, const u8 *addr[], size_t len[], + u8 *out) +{ + struct crypto_cipher *tfm; + struct crypto_skcipher *tfm2; + struct skcipher_request *req; + struct scatterlist src[1], dst[1]; + size_t crypt_len; + int res; + u8 frame_iv[AES_BLOCK_SIZE], iv[AES_BLOCK_SIZE]; + u8 check[AES_BLOCK_SIZE]; + + crypt_len = iv_c_len - AES_BLOCK_SIZE; + key_len /= 2; /* S2V key || CTR key */ + addr[num_elem] = out; + len[num_elem] = crypt_len; + num_elem++; + + memcpy(iv, iv_crypt, AES_BLOCK_SIZE); + memcpy(frame_iv, iv_crypt, AES_BLOCK_SIZE); + + /* Synthetic IV to be used as the initial counter in CTR: + * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31) + */ + iv[8] &= 0x7f; + iv[12] &= 0x7f; + + /* CTR */ + + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + if (IS_ERR(tfm2)) + return PTR_ERR(tfm2); + /* K2 for CTR */ + res = crypto_skcipher_setkey(tfm2, key + key_len, key_len); + if (res) { + crypto_free_skcipher(tfm2); + return res; + } + + req = skcipher_request_alloc(tfm2, GFP_KERNEL); + if (!req) { + crypto_free_skcipher(tfm2); + return -ENOMEM; + } + + sg_init_one(src, iv_crypt + AES_BLOCK_SIZE, crypt_len); + sg_init_one(dst, out, crypt_len); + skcipher_request_set_crypt(req, src, dst, crypt_len, iv); + res = crypto_skcipher_decrypt(req); + skcipher_request_free(req); + crypto_free_skcipher(tfm2); + if (res) + return res; + + /* S2V */ + + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + /* K1 for S2V */ + res = crypto_cipher_setkey(tfm, key, key_len); + if (!res) + res = aes_s2v(tfm, num_elem, addr, len, check); + crypto_free_cipher(tfm); + if (res) + return res; + if (memcmp(check, frame_iv, AES_BLOCK_SIZE) != 0) + return -EINVAL; + return 0; +} + +int fils_encrypt_assoc_req(struct sk_buff *skb, + struct ieee80211_mgd_assoc_data *assoc_data) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + u8 *capab, *ies, *encr; + const u8 *addr[5 + 1], *session; + size_t len[5 + 1]; + size_t crypt_len; + + if (ieee80211_is_reassoc_req(mgmt->frame_control)) { + capab = (u8 *)&mgmt->u.reassoc_req.capab_info; + ies = mgmt->u.reassoc_req.variable; + } else { + capab = (u8 *)&mgmt->u.assoc_req.capab_info; + ies = mgmt->u.assoc_req.variable; + } + + session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, + ies, skb->data + skb->len - ies); + if (!session || session[1] != 1 + 8) + return -EINVAL; + /* encrypt after FILS Session element */ + encr = (u8 *)session + 2 + 1 + 8; + + /* AES-SIV AAD vectors */ + + /* The STA's MAC address */ + addr[0] = mgmt->sa; + len[0] = ETH_ALEN; + /* The AP's BSSID */ + addr[1] = mgmt->da; + len[1] = ETH_ALEN; + /* The STA's nonce */ + addr[2] = assoc_data->fils_nonces; + len[2] = FILS_NONCE_LEN; + /* The AP's nonce */ + addr[3] = &assoc_data->fils_nonces[FILS_NONCE_LEN]; + len[3] = FILS_NONCE_LEN; + /* The (Re)Association Request frame from the Capability Information + * field to the FILS Session element (both inclusive). + */ + addr[4] = capab; + len[4] = encr - capab; + + crypt_len = skb->data + skb->len - encr; + skb_put(skb, AES_BLOCK_SIZE); + return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len, + encr, crypt_len, 1, addr, len, encr); +} + +int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, + u8 *frame, size_t *frame_len, + struct ieee80211_mgd_assoc_data *assoc_data) +{ + struct ieee80211_mgmt *mgmt = (void *)frame; + u8 *capab, *ies, *encr; + const u8 *addr[5 + 1], *session; + size_t len[5 + 1]; + int res; + size_t crypt_len; + + if (*frame_len < 24 + 6) + return -EINVAL; + + capab = (u8 *)&mgmt->u.assoc_resp.capab_info; + ies = mgmt->u.assoc_resp.variable; + session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, + ies, frame + *frame_len - ies); + if (!session || session[1] != 1 + 8) { + mlme_dbg(sdata, + "No (valid) FILS Session element in (Re)Association Response frame from %pM", + mgmt->sa); + return -EINVAL; + } + /* decrypt after FILS Session element */ + encr = (u8 *)session + 2 + 1 + 8; + + /* AES-SIV AAD vectors */ + + /* The AP's BSSID */ + addr[0] = mgmt->sa; + len[0] = ETH_ALEN; + /* The STA's MAC address */ + addr[1] = mgmt->da; + len[1] = ETH_ALEN; + /* The AP's nonce */ + addr[2] = &assoc_data->fils_nonces[FILS_NONCE_LEN]; + len[2] = FILS_NONCE_LEN; + /* The STA's nonce */ + addr[3] = assoc_data->fils_nonces; + len[3] = FILS_NONCE_LEN; + /* The (Re)Association Response frame from the Capability Information + * field to the FILS Session element (both inclusive). + */ + addr[4] = capab; + len[4] = encr - capab; + + crypt_len = frame + *frame_len - encr; + if (crypt_len < AES_BLOCK_SIZE) { + mlme_dbg(sdata, + "Not enough room for AES-SIV data after FILS Session element in (Re)Association Response frame from %pM", + mgmt->sa); + return -EINVAL; + } + res = aes_siv_decrypt(assoc_data->fils_kek, assoc_data->fils_kek_len, + encr, crypt_len, 5, addr, len, encr); + if (res != 0) { + mlme_dbg(sdata, + "AES-SIV decryption of (Re)Association Response frame from %pM failed", + mgmt->sa); + return res; + } + *frame_len -= AES_BLOCK_SIZE; + return 0; +} diff --git a/net/mac80211/fils_aead.h b/net/mac80211/fils_aead.h new file mode 100644 index 000000000000..fbc65232f0b3 --- /dev/null +++ b/net/mac80211/fils_aead.h @@ -0,0 +1,19 @@ +/* + * FILS AEAD for (Re)Association Request/Response frames + * Copyright 2016, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef FILS_AEAD_H +#define FILS_AEAD_H + +int fils_encrypt_assoc_req(struct sk_buff *skb, + struct ieee80211_mgd_assoc_data *assoc_data); +int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, + u8 *frame, size_t *frame_len, + struct ieee80211_mgd_assoc_data *assoc_data); + +#endif /* FILS_AEAD_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 34c2add2c455..d37a577f63a1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -84,6 +84,8 @@ struct ieee80211_local; #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL +extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; + #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) #define IEEE80211_MAX_NAN_INSTANCE_ID 255 @@ -307,6 +309,7 @@ struct ieee80211_if_vlan { /* used for all tx if the VLAN is configured to 4-addr mode */ struct sta_info __rcu *sta; + atomic_t num_mcast_sta; /* number of stations receiving multicast */ }; struct mesh_stats { @@ -398,6 +401,10 @@ struct ieee80211_mgd_assoc_data { struct ieee80211_vht_cap ap_vht_cap; + u8 fils_nonces[2 * FILS_NONCE_LEN]; + u8 fils_kek[FILS_MAX_KEK_LEN]; + size_t fils_kek_len; + size_t ie_len; u8 ie[]; }; @@ -442,7 +449,7 @@ struct ieee80211_if_managed { struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; - u8 bssid[ETH_ALEN]; + u8 bssid[ETH_ALEN] __aligned(2); u16 aid; @@ -1527,6 +1534,23 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status) return false; } +void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata); + +/* This function returns the number of multicast stations connected to this + * interface. It returns -1 if that number is not tracked, that is for netdevs + * not in AP or AP_VLAN mode or when using 4addr. + */ +static inline int +ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP) + return atomic_read(&sdata->u.ap.num_mcast_sta); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) + return atomic_read(&sdata->u.vlan.num_mcast_sta); + return -1; +} + u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, struct ieee80211_rx_status *status, unsigned int mpdu_len, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 73e6a8fd2845..41497b670e2b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1998,3 +1998,19 @@ void ieee80211_iface_exit(void) { unregister_netdevice_notifier(&mac80211_netdev_notifier); } + +void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP) + atomic_inc(&sdata->u.ap.num_mcast_sta); + else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + atomic_inc(&sdata->u.vlan.num_mcast_sta); +} + +void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP) + atomic_dec(&sdata->u.ap.num_mcast_sta); + else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + atomic_dec(&sdata->u.vlan.num_mcast_sta); +} diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1075ac24c8c5..1822c77f2b1c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -549,6 +549,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_FEATURE_MAC_ON_CREATE | NL80211_FEATURE_USERSPACE_MPM | NL80211_FEATURE_FULL_AP_CLIENT_STATE; + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA); if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -821,6 +822,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) && + !local->ops->set_frag_threshold)) + return -EINVAL; + if (WARN_ON(local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN) && (!local->ops->start_nan || !local->ops->stop_nan))) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7486f2dab4ba..d157b250ff77 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -30,6 +30,7 @@ #include "driver-ops.h" #include "rate.h" #include "led.h" +#include "fils_aead.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) @@ -652,6 +653,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 2 + sizeof(struct ieee80211_ht_cap) + /* HT */ 2 + sizeof(struct ieee80211_vht_cap) + /* VHT */ assoc_data->ie_len + /* extra IEs */ + (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) + 9, /* WMM */ GFP_KERNEL); if (!skb) @@ -875,6 +877,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) memcpy(pos, assoc_data->ie + offset, noffset - offset); } + if (assoc_data->fils_kek_len && + fils_encrypt_assoc_req(skb, assoc_data) < 0) { + dev_kfree_skb(skb); + return; + } + drv_mgd_prepare_tx(local, sdata); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -2618,6 +2626,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, case WLAN_AUTH_LEAP: case WLAN_AUTH_FT: case WLAN_AUTH_SAE: + case WLAN_AUTH_FILS_SK: + case WLAN_AUTH_FILS_SK_PFS: + case WLAN_AUTH_FILS_PK: break; case WLAN_AUTH_SHARED_KEY: if (ifmgd->auth_data->expected_transaction != 4) { @@ -3143,6 +3154,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, reassoc ? "Rea" : "A", mgmt->sa, capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + if (assoc_data->fils_kek_len && + fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0) + return; + pos = mgmt->u.assoc_resp.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems); @@ -3193,7 +3208,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, uapsd_queues = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) if (sdata->tx_conf[ac].uapsd) - uapsd_queues |= BIT(ac); + uapsd_queues |= ieee80211_ac_to_qos_mask[ac]; } cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues); @@ -4479,24 +4494,36 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, case NL80211_AUTHTYPE_SAE: auth_alg = WLAN_AUTH_SAE; break; + case NL80211_AUTHTYPE_FILS_SK: + auth_alg = WLAN_AUTH_FILS_SK; + break; + case NL80211_AUTHTYPE_FILS_SK_PFS: + auth_alg = WLAN_AUTH_FILS_SK_PFS; + break; + case NL80211_AUTHTYPE_FILS_PK: + auth_alg = WLAN_AUTH_FILS_PK; + break; default: return -EOPNOTSUPP; } - auth_data = kzalloc(sizeof(*auth_data) + req->sae_data_len + + auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len + req->ie_len, GFP_KERNEL); if (!auth_data) return -ENOMEM; auth_data->bss = req->bss; - if (req->sae_data_len >= 4) { - __le16 *pos = (__le16 *) req->sae_data; - auth_data->sae_trans = le16_to_cpu(pos[0]); - auth_data->sae_status = le16_to_cpu(pos[1]); - memcpy(auth_data->data, req->sae_data + 4, - req->sae_data_len - 4); - auth_data->data_len += req->sae_data_len - 4; + if (req->auth_data_len >= 4) { + if (req->auth_type == NL80211_AUTHTYPE_SAE) { + __le16 *pos = (__le16 *) req->auth_data; + + auth_data->sae_trans = le16_to_cpu(pos[0]); + auth_data->sae_status = le16_to_cpu(pos[1]); + } + memcpy(auth_data->data, req->auth_data + 4, + req->auth_data_len - 4); + auth_data->data_len += req->auth_data_len - 4; } if (req->ie && req->ie_len) { @@ -4692,6 +4719,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->ie_len = req->ie_len; } + if (req->fils_kek) { + /* should already be checked in cfg80211 - so warn */ + if (WARN_ON(req->fils_kek_len > FILS_MAX_KEK_LEN)) { + err = -EINVAL; + goto err_free; + } + memcpy(assoc_data->fils_kek, req->fils_kek, + req->fils_kek_len); + assoc_data->fils_kek_len = req->fils_kek_len; + } + + if (req->fils_nonces) + memcpy(assoc_data->fils_nonces, req->fils_nonces, + 2 * FILS_NONCE_LEN); + assoc_data->bss = req->bss; if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index c3f610bba3fe..eede5c6db8d5 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -820,7 +820,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) break; rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->da); + sta = sta_info_get_bss(sdata, mgmt->da); rcu_read_unlock(); if (!sta) return -ENOLINK; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6175db385ba7..eeab7250f4b9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1394,13 +1394,15 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) u8 ac = ieee802_1d_to_ac[tid & 7]; /* - * If this AC is not trigger-enabled do nothing. + * If this AC is not trigger-enabled do nothing unless the + * driver is calling us after it already checked. * * NB: This could/should check a separate bitmap of trigger- * enabled queues, but for now we only implement uAPSD w/o * TSPEC changes to the ACs, so they're always the same. */ - if (!(sta->sta.uapsd_queues & BIT(ac))) + if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) && + tid != IEEE80211_NUM_TIDS) return; /* if we are in a service period, do nothing */ @@ -2215,7 +2217,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { - if (is_multicast_ether_addr(ehdr->h_dest)) { + if (is_multicast_ether_addr(ehdr->h_dest) && + ieee80211_vif_get_num_mcast_if(sdata) != 0) { /* * send multicast frames both to higher layers in * local net stack and back to the wireless medium @@ -2224,7 +2227,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if (!xmit_skb) net_info_ratelimited("%s: failed to clone multicast frame\n", dev->name); - } else { + } else if (!is_multicast_ether_addr(ehdr->h_dest)) { dsta = sta_info_get(sdata, skb->data); if (dsta) { /* @@ -2298,6 +2301,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct ethhdr ethhdr; + const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -2308,24 +2313,53 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (!(status->rx_flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; - if (ieee80211_has_a4(hdr->frame_control) && - rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; + if (unlikely(ieee80211_has_a4(hdr->frame_control))) { + switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + if (!rx->sdata->u.vlan.sta) + return RX_DROP_UNUSABLE; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sdata->u.mgd.use_4addr) + return RX_DROP_UNUSABLE; + break; + default: + return RX_DROP_UNUSABLE; + } + check_da = NULL; + check_sa = NULL; + } else switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + check_da = NULL; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sta || + !test_sta_flag(rx->sta, WLAN_STA_TDLS_PEER)) + check_sa = NULL; + break; + case NL80211_IFTYPE_MESH_POINT: + check_sa = NULL; + break; + default: + break; + } - if (is_multicast_ether_addr(hdr->addr1) && - ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - rx->sdata->u.vlan.sta) || - (rx->sdata->vif.type == NL80211_IFTYPE_STATION && - rx->sdata->u.mgd.use_4addr))) + if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; skb->dev = dev; __skb_queue_head_init(&frame_list); + if (ieee80211_data_to_8023_exthdr(skb, ðhdr, + rx->sdata->vif.addr, + rx->sdata->vif.type)) + return RX_DROP_UNUSABLE; + ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom, true); + rx->local->hw.extra_tx_headroom, + check_da, check_sa); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 78e9ecbc96e6..236d47e76ced 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -709,7 +709,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; - if (ignore_for_tim & BIT(ac)) + if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac]) continue; indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || @@ -1389,7 +1389,7 @@ ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, return true; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - if (ignored_acs & BIT(ac)) + if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; if (!skb_queue_empty(&sta->tx_filtered[ac]) || @@ -1414,7 +1414,7 @@ ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; - if (ignored_acs & BIT(ac)) + if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; tids = ieee80211_tids_for_ac(ac); @@ -1482,7 +1482,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { - int tid; + int tid, ac; /* * For PS-Poll, this can only happen due to a race condition @@ -1500,7 +1500,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, */ /* This will evaluate to 1, 3, 5 or 7. */ - tid = 7 - ((ffs(~ignored_acs) - 1) << 1); + for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++) + if (ignored_acs & BIT(ac)) + continue; + tid = 7 - 2 * ac; ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { @@ -1871,10 +1874,7 @@ int sta_info_move_state(struct sta_info *sta, if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { - if (sta->sdata->vif.type == NL80211_IFTYPE_AP || - (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !sta->sdata->u.vlan.sta)) - atomic_dec(&sta->sdata->bss->num_mcast_sta); + ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_clear_fast_xmit(sta); ieee80211_clear_fast_rx(sta); @@ -1882,10 +1882,7 @@ int sta_info_move_state(struct sta_info *sta, break; case IEEE80211_STA_AUTHORIZED: if (sta->sta_state == IEEE80211_STA_ASSOC) { - if (sta->sdata->vif.type == NL80211_IFTYPE_AP || - (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !sta->sdata->u.vlan.sta)) - atomic_inc(&sta->sdata->bss->num_mcast_sta); + ieee80211_vif_inc_num_mcast(sta->sdata); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ed5fcb984a01..dd06ef0b8861 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -184,7 +184,6 @@ struct tid_ampdu_tx { * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). - * @dialog_token: dialog token for aggregation session * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and @@ -213,7 +212,6 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; - u8 dialog_token; bool auto_seq; bool removed; }; @@ -225,6 +223,7 @@ struct tid_ampdu_rx { * to tid_tx[idx], which are protected by the sta spinlock) * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected + * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the @@ -243,6 +242,7 @@ struct sta_ampdu_mlme { struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; + u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1c56abc49627..62ccaf6f585d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -331,9 +331,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); return TX_DROP; } - } else if (unlikely(tx->sdata->vif.type == NL80211_IFTYPE_AP && - ieee80211_is_data(hdr->frame_control) && - !atomic_read(&tx->sdata->u.ap.num_mcast_sta))) { + } else if (unlikely(ieee80211_is_data(hdr->frame_control) && + ieee80211_vif_get_num_mcast_if(tx->sdata) == 0)) { /* * No associated STAs - no need to send multicast * frames. @@ -935,7 +934,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) if (info->flags & IEEE80211_TX_CTL_DONTFRAG) return TX_CONTINUE; - if (tx->local->ops->set_frag_threshold) + if (ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) return TX_CONTINUE; /* @@ -2801,7 +2800,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) /* fast-xmit doesn't handle fragmentation at all */ if (local->hw.wiphy->frag_threshold != (u32)-1 && - !local->ops->set_frag_threshold) + !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG)) goto out; rcu_read_lock(); @@ -3060,11 +3059,12 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr; - struct ethhdr amsdu_hdr; + struct ethhdr *amsdu_hdr; int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); int subframe_len = skb->len - hdr_len; void *data; - u8 *qc; + u8 *qc, *h_80211_src, *h_80211_dst; + const u8 *bssid; if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) return false; @@ -3072,19 +3072,44 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) return true; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr), &subframe_len)) return false; - amsdu_hdr.h_proto = cpu_to_be16(subframe_len); - memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); - memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); + data = skb_push(skb, sizeof(*amsdu_hdr)); + memmove(data, data + sizeof(*amsdu_hdr), hdr_len); + hdr = data; + amsdu_hdr = data + hdr_len; + /* h_80211_src/dst is addr* field within hdr */ + h_80211_src = data + fast_tx->sa_offs; + h_80211_dst = data + fast_tx->da_offs; + + amsdu_hdr->h_proto = cpu_to_be16(subframe_len); + ether_addr_copy(amsdu_hdr->h_source, h_80211_src); + ether_addr_copy(amsdu_hdr->h_dest, h_80211_dst); + + /* according to IEEE 802.11-2012 8.3.2 table 8-19, the outer SA/DA + * fields needs to be changed to BSSID for A-MSDU frames depending + * on FromDS/ToDS values. + */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + bssid = sdata->u.mgd.bssid; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + bssid = sdata->vif.addr; + break; + default: + bssid = NULL; + } - data = skb_push(skb, sizeof(amsdu_hdr)); - memmove(data, data + sizeof(amsdu_hdr), hdr_len); - memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); + if (bssid && ieee80211_has_fromds(hdr->frame_control)) + ether_addr_copy(h_80211_src, bssid); + + if (bssid && ieee80211_has_tods(hdr->frame_control)) + ether_addr_copy(h_80211_dst, bssid); - hdr = data; qc = ieee80211_get_qos_ctl(hdr); *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 545c79a42a77..ac59fbd280df 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3308,10 +3308,11 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; enum nl80211_iftype iftype = sdata->wdev.iftype; - int num[NUM_NL80211_IFTYPES]; struct ieee80211_chanctx *ctx; - int num_different_channels = 0; int total = 1; + struct iface_combination_params params = { + .radar_detect = radar_detect, + }; lockdep_assert_held(&local->chanctx_mtx); @@ -3322,12 +3323,19 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, !chandef->chan)) return -EINVAL; - if (chandef) - num_different_channels = 1; - if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) return -EINVAL; + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) { + /* + * always passing this is harmless, since it'll be the + * same value that cfg80211 finds if it finds the same + * interface ... and that's always allowed + */ + params.new_beacon_int = sdata->vif.bss_conf.beacon_int; + } + /* Always allow software iftypes */ if (local->hw.wiphy->software_iftypes & BIT(iftype)) { if (radar_detect) @@ -3335,24 +3343,26 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, return 0; } - memset(num, 0, sizeof(num)); + if (chandef) + params.num_different_channels = 1; if (iftype != NL80211_IFTYPE_UNSPECIFIED) - num[iftype] = 1; + params.iftype_num[iftype] = 1; list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) continue; - radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + params.radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { - num_different_channels++; + params.num_different_channels++; continue; } if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && cfg80211_chandef_compatible(chandef, &ctx->conf.def)) continue; - num_different_channels++; + params.num_different_channels++; } list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { @@ -3365,16 +3375,14 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) continue; - num[wdev_iter->iftype]++; + params.iftype_num[wdev_iter->iftype]++; total++; } - if (total == 1 && !radar_detect) + if (total == 1 && !params.radar_detect) return 0; - return cfg80211_check_combinations(local->hw.wiphy, - num_different_channels, - radar_detect, num); + return cfg80211_check_combinations(local->hw.wiphy, ¶ms); } static void @@ -3390,12 +3398,10 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, int ieee80211_max_num_channels(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int num[NUM_NL80211_IFTYPES] = {}; struct ieee80211_chanctx *ctx; - int num_different_channels = 0; - u8 radar_detect = 0; u32 max_num_different_channels = 1; int err; + struct iface_combination_params params = {0}; lockdep_assert_held(&local->chanctx_mtx); @@ -3403,17 +3409,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) continue; - num_different_channels++; + params.num_different_channels++; - radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + params.radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); } list_for_each_entry_rcu(sdata, &local->interfaces, list) - num[sdata->wdev.iftype]++; + params.iftype_num[sdata->wdev.iftype]++; - err = cfg80211_iter_combinations(local->hw.wiphy, - num_different_channels, radar_detect, - num, ieee80211_iter_max_chans, + err = cfg80211_iter_combinations(local->hw.wiphy, ¶ms, + ieee80211_iter_max_chans, &max_num_different_channels); if (err < 0) return err; @@ -3456,3 +3462,10 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); + +const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS] = { + IEEE80211_WMM_IE_STA_QOSINFO_AC_VO, + IEEE80211_WMM_IE_STA_QOSINFO_AC_VI, + IEEE80211_WMM_IE_STA_QOSINFO_AC_BE, + IEEE80211_WMM_IE_STA_QOSINFO_AC_BK +}; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 9eb0aee9105b..3e3d3014e9ab 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -236,26 +236,35 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, { struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + u8 flags; u8 *p; - u8 ack_policy, tid; if (!ieee80211_is_data_qos(hdr->frame_control)) return; p = ieee80211_get_qos_ctl(hdr); - tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - /* preserve EOSP bit */ - ack_policy = *p & IEEE80211_QOS_CTL_EOSP; + /* set up the first byte */ + + /* + * preserve everything but the TID and ACK policy + * (which we both write here) + */ + flags = *p & ~(IEEE80211_QOS_CTL_TID_MASK | + IEEE80211_QOS_CTL_ACK_POLICY_MASK); if (is_multicast_ether_addr(hdr->addr1) || sdata->noack_map & BIT(tid)) { - ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; + flags |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; info->flags |= IEEE80211_TX_CTL_NO_ACK; } - /* qos header is 2 bytes */ - *p++ = ack_policy | tid; + *p = flags | tid; + + /* set up the second byte */ + p++; + if (ieee80211_vif_is_mesh(&sdata->vif)) { /* preserve RSPI and Mesh PS Level bit */ *p &= ((IEEE80211_QOS_CTL_RSPI | diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b48c1e13e281..8af6dd388d11 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -57,7 +57,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) if (info->control.hw_key && (info->flags & IEEE80211_TX_CTL_DONTFRAG || - tx->local->ops->set_frag_threshold) && + ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { /* hwaccel - with no need for SW-generated MMIC */ return TX_CONTINUE; @@ -405,7 +405,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, u8 *pos; u8 pn[6]; u64 pn64; - u8 aad[2 * AES_BLOCK_SIZE]; + u8 aad[CCM_AAD_LEN]; u8 b_0[AES_BLOCK_SIZE]; if (info->control.hw_key && @@ -461,10 +461,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); - ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, mic_len), mic_len); - - return 0; + return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, + skb_put(skb, mic_len), mic_len); } @@ -639,7 +637,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) u8 *pos; u8 pn[6]; u64 pn64; - u8 aad[2 * AES_BLOCK_SIZE]; + u8 aad[GCM_AAD_LEN]; u8 j_0[AES_BLOCK_SIZE]; if (info->control.hw_key && @@ -696,10 +694,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_GCMP_HDR_LEN; gcmp_special_blocks(skb, pn, j_0, aad); - ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, - skb_put(skb, IEEE80211_GCMP_MIC_LEN)); - - return 0; + return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, + skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } ieee80211_tx_result @@ -1123,9 +1119,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) struct ieee80211_key *key = tx->key; struct ieee80211_mmie_16 *mmie; struct ieee80211_hdr *hdr; - u8 aad[20]; + u8 aad[GMAC_AAD_LEN]; u64 pn64; - u8 nonce[12]; + u8 nonce[GMAC_NONCE_LEN]; if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) return TX_DROP; @@ -1171,7 +1167,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie_16 *mmie; - u8 aad[20], mic[16], ipn[6], nonce[12]; + u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 13290a70fa71..1308a56f2591 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -246,6 +246,7 @@ enum { ncsi_dev_state_config_gls, ncsi_dev_state_config_done, ncsi_dev_state_suspend_select = 0x0401, + ncsi_dev_state_suspend_gls, ncsi_dev_state_suspend_dcnt, ncsi_dev_state_suspend_dc, ncsi_dev_state_suspend_deselect, @@ -264,6 +265,7 @@ struct ncsi_dev_priv { #endif unsigned int package_num; /* Number of packages */ struct list_head packages; /* List of packages */ + struct ncsi_channel *hot_channel; /* Channel was ever active */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ #define NCSI_REQ_START_IDX 1 diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index b41a6617d498..6898e7229285 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -141,23 +141,35 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* If the channel is active one, we need reconfigure it */ + spin_lock_irqsave(&nc->lock, flags); ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE || - (ncm->data[3] & 0x1)) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } - if (ndp->flags & NCSI_DEV_HWA) + spin_unlock_irqrestore(&nc->lock, flags); + if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1)) ndp->flags |= NCSI_DEV_RESHUFFLE; /* If this channel is the active one and the link doesn't * work, we have to choose another channel to be active one. * The logic here is exactly similar to what we do when link * is down on the active channel. + * + * On the other hand, we need configure it when host driver + * state on the active channel becomes ready. */ ncsi_stop_channel_monitor(nc); + + spin_lock_irqsave(&nc->lock, flags); + nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE : + NCSI_CHANNEL_ACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 5e509e547c2d..a3bd5fa8ad09 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -540,42 +540,86 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_select; /* Fall through */ case ncsi_dev_state_suspend_select: - case ncsi_dev_state_suspend_dcnt: - case ncsi_dev_state_suspend_dc: - case ncsi_dev_state_suspend_deselect: ndp->pending_req_num = 1; - np = ndp->active_package; - nc = ndp->active_channel; + nca.type = NCSI_PKT_CMD_SP; nca.package = np->id; - if (nd->state == ncsi_dev_state_suspend_select) { - nca.type = NCSI_PKT_CMD_SP; - nca.channel = NCSI_RESERVED_CHANNEL; - if (ndp->flags & NCSI_DEV_HWA) - nca.bytes[0] = 0; - else - nca.bytes[0] = 1; + nca.channel = NCSI_RESERVED_CHANNEL; + if (ndp->flags & NCSI_DEV_HWA) + nca.bytes[0] = 0; + else + nca.bytes[0] = 1; + + /* To retrieve the last link states of channels in current + * package when current active channel needs fail over to + * another one. It means we will possibly select another + * channel as next active one. The link states of channels + * are most important factor of the selection. So we need + * accurate link states. Unfortunately, the link states on + * inactive channels can't be updated with LSC AEN in time. + */ + if (ndp->flags & NCSI_DEV_RESHUFFLE) + nd->state = ncsi_dev_state_suspend_gls; + else nd->state = ncsi_dev_state_suspend_dcnt; - } else if (nd->state == ncsi_dev_state_suspend_dcnt) { - nca.type = NCSI_PKT_CMD_DCNT; - nca.channel = nc->id; - nd->state = ncsi_dev_state_suspend_dc; - } else if (nd->state == ncsi_dev_state_suspend_dc) { - nca.type = NCSI_PKT_CMD_DC; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; + case ncsi_dev_state_suspend_gls: + ndp->pending_req_num = np->channel_num; + + nca.type = NCSI_PKT_CMD_GLS; + nca.package = np->id; + + nd->state = ncsi_dev_state_suspend_dcnt; + NCSI_FOR_EACH_CHANNEL(np, nc) { nca.channel = nc->id; - nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_deselect; - } else if (nd->state == ncsi_dev_state_suspend_deselect) { - nca.type = NCSI_PKT_CMD_DP; - nca.channel = NCSI_RESERVED_CHANNEL; - nd->state = ncsi_dev_state_suspend_done; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; } + break; + case ncsi_dev_state_suspend_dcnt: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DCNT; + nca.package = np->id; + nca.channel = nc->id; + + nd->state = ncsi_dev_state_suspend_dc; ret = ncsi_xmit_cmd(&nca); - if (ret) { - nd->state = ncsi_dev_state_functional; - return; - } + if (ret) + goto error; + + break; + case ncsi_dev_state_suspend_dc: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DC; + nca.package = np->id; + nca.channel = nc->id; + nca.bytes[0] = 1; + + nd->state = ncsi_dev_state_suspend_deselect; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; + case ncsi_dev_state_suspend_deselect: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DP; + nca.package = np->id; + nca.channel = NCSI_RESERVED_CHANNEL; + + nd->state = ncsi_dev_state_suspend_done; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; break; case ncsi_dev_state_suspend_done: @@ -589,6 +633,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", nd->state); } + + return; +error: + nd->state = ncsi_dev_state_functional; } static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) @@ -597,6 +645,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct net_device *dev = nd->dev; struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; + struct ncsi_channel *hot_nc = NULL; struct ncsi_cmd_arg nca; unsigned char index; unsigned long flags; @@ -702,12 +751,20 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_config_done: spin_lock_irqsave(&nc->lock, flags); - if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) + if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + hot_nc = nc; nc->state = NCSI_CHANNEL_ACTIVE; - else + } else { + hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; + } spin_unlock_irqrestore(&nc->lock, flags); + /* Update the hot channel */ + spin_lock_irqsave(&ndp->lock, flags); + ndp->hot_channel = hot_nc; + spin_unlock_irqrestore(&ndp->lock, flags); + ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); break; @@ -725,10 +782,14 @@ error: static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) { struct ncsi_package *np; - struct ncsi_channel *nc, *found; + struct ncsi_channel *nc, *found, *hot_nc; struct ncsi_channel_mode *ncm; unsigned long flags; + spin_lock_irqsave(&ndp->lock, flags); + hot_nc = ndp->hot_channel; + spin_unlock_irqrestore(&ndp->lock, flags); + /* The search is done once an inactive channel with up * link is found. */ @@ -746,6 +807,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) if (!found) found = nc; + if (nc == hot_nc) + found = nc; + ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { spin_unlock_irqrestore(&nc->lock, flags); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8d56d9a4df2..44410d30d461 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -57,6 +57,10 @@ config NF_CONNTRACK config NF_LOG_COMMON tristate +config NF_LOG_NETDEV + tristate "Netdev packet logging" + select NF_LOG_COMMON + if NF_CONNTRACK config NF_CONNTRACK_MARK @@ -474,6 +478,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_RT + tristate "Netfilter nf_tables routing module" + help + This option adds the "rt" expression that you can use to match + packet routing information such as the packet nexthop. + config NFT_NUMGEN tristate "Netfilter nf_tables number generator module" help @@ -581,6 +591,19 @@ config NFT_HASH This option adds the "hash" expression that you can use to perform a hash operation on registers. +config NFT_FIB + tristate + +config NFT_FIB_INET + depends on NF_TABLES_INET + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables fib inet support" + help + This option allows using the FIB expression from the inet table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + if NF_TABLES_NETDEV config NF_DUP_NETDEV @@ -1409,9 +1432,10 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on !NF_CONNTRACK || NF_CONNTRACK depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n + depends on NF_SOCKET_IPV4 + depends on NF_SOCKET_IPV6 select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c23c3c84416f..5bbf767672ec 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -48,6 +48,9 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o +# packet logging for netdev family +obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o + obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o @@ -81,6 +84,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o @@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_FIB) += nft_fib.o +obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fcb5d1df11e9..004af030ef1a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -361,16 +361,9 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err; - - RCU_INIT_POINTER(state->hook_entries, entry); - err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) - goto next_hook; - kfree_skb(skb); - } + ret = nf_queue(skb, state, &entry, verdict); + if (ret == 1 && entry) + goto next_hook; } return ret; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ba6a1d421222..df2f5a3901df 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -983,7 +983,7 @@ static void gc_worker(struct work_struct *work) return; ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) next_run = 0; gc_work->last_bucket = i; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e0adb5959342..9fdb655f85bc 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -18,7 +18,7 @@ unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - unsigned int queuenum); + struct nf_hook_entry **entryp, unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index 119fe1cb1ea9..ed9b80815fa0 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -175,6 +175,33 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); +/* bridge and netdev logging families share this code. */ +void nf_log_l2packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_IPV6): + nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + } +} +EXPORT_SYMBOL_GPL(nf_log_l2packet); + static int __init nf_log_common_init(void) { return 0; diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c new file mode 100644 index 000000000000..1f645949f3d8 --- /dev/null +++ b/net/netfilter/nf_log_netdev.c @@ -0,0 +1,80 @@ +/* + * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_log.h> + +static void nf_log_netdev_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); +} + +static struct nf_logger nf_netdev_logger __read_mostly = { + .name = "nf_log_netdev", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_netdev_net_init(struct net *net) +{ + return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); +} + +static void __net_exit nf_log_netdev_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_netdev_logger); +} + +static struct pernet_operations nf_log_netdev_net_ops = { + .init = nf_log_netdev_net_init, + .exit = nf_log_netdev_net_exit, +}; + +static int __init nf_log_netdev_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_netdev_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); + return 0; +} + +static void __exit nf_log_netdev_exit(void) +{ + unregister_pernet_subsys(&nf_log_netdev_net_ops); + nf_log_unregister(&nf_netdev_logger); +} + +module_init(nf_log_netdev_init); +module_exit(nf_log_netdev_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter netdev packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 96964a0070e1..8f08d759844a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -107,13 +107,8 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) rcu_read_unlock(); } -/* - * Any packet that leaves via this function must come back - * through nf_reinject(). - */ -int nf_queue(struct sk_buff *skb, - struct nf_hook_state *state, - unsigned int queuenum) +static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, + unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -161,6 +156,27 @@ err: return status; } +/* Packets leaving via this function must come back through nf_reinject(). */ +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp, unsigned int verdict) +{ + struct nf_hook_entry *entry = *entryp; + int ret; + + RCU_INIT_POINTER(state->hook_entries, entry); + ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + if (ret < 0) { + if (ret == -ESRCH && + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { + *entryp = rcu_dereference(entry->next); + return 1; + } + kfree_skb(skb); + } + + return 0; +} + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct nf_hook_entry *hook_entry; @@ -187,26 +203,26 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) entry->state.thresh = INT_MIN; if (verdict == NF_ACCEPT) { - next_hook: - verdict = nf_iterate(skb, &entry->state, &hook_entry); + hook_entry = rcu_dereference(hook_entry->next); + if (hook_entry) +next_hook: + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: +okfn: local_bh_disable(); entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); - err = nf_queue(skb, &entry->state, - verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) + err = nf_queue(skb, &entry->state, &hook_entry, verdict); + if (err == 1) { + if (hook_entry) goto next_hook; - kfree_skb(skb); + goto okfn; } break; case NF_STOLEN: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b70d3ea1430e..24db22257586 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4423,7 +4423,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, */ unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { - int val; + u32 val; val = ntohl(nla_get_be32(attr)); if (val > max) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 0dd5c695482f..70de32a6d5c0 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -178,6 +178,7 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + case NF_STOLEN: nft_trace_packet(&info, chain, rule, rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index eb086a192c5a..7435505037b7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = alloc_skb(n, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index af832c526048..5379f788a372 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -69,7 +69,7 @@ struct nfqnl_instance { * Following fields are dirtied for each queued packet, * keep them in same cache line if possible. */ - spinlock_t lock; + spinlock_t lock ____cacheline_aligned_in_smp; unsigned int queue_total; unsigned int id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */ diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2e53739812b1..e25b35d70e4d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d7b0d171172a..6837348c8993 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = { .owner = THIS_MODULE, }; +static void nft_notrack_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct sk_buff *skb = pkt->skb; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(pkt->skb, &ctinfo); + /* Previously seen (loopback or untracked)? Ignore. */ + if (ct) + return; + + ct = nf_ct_untracked_get(); + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; +} + +static struct nft_expr_type nft_notrack_type; +static const struct nft_expr_ops nft_notrack_ops = { + .type = &nft_notrack_type, + .size = NFT_EXPR_SIZE(0), + .eval = nft_notrack_eval, +}; + +static struct nft_expr_type nft_notrack_type __read_mostly = { + .name = "notrack", + .ops = &nft_notrack_ops, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { + int err; + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); - return nft_register_expr(&nft_ct_type); + err = nft_register_expr(&nft_ct_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_notrack_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_expr(&nft_ct_type); + return err; } static void __exit nft_ct_module_exit(void) { + nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("ct"); +MODULE_ALIAS_NFT_EXPR("notrack"); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index e3b83c31da2e..517f08767a3c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -158,7 +158,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + tb[NFTA_DYNSET_TIMEOUT]))); } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -246,7 +247,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout), + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(priv->timeout)), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a84cf3d66056..47beb3abcc9d 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len, err; + u32 offset, len; + int err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c new file mode 100644 index 000000000000..4944a8b7f7a7 --- /dev/null +++ b/net/netfilter/nft_fib.c @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Generic part shared by ipv4 and ipv6 backends. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { + [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_RESULT] = { .type = NLA_U32 }, + [NFTA_FIB_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL(nft_fib_policy); + +#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: /* fallthrough */ + case NFT_FIB_RESULT_OIFNAME: + hooks = (1 << NF_INET_PRE_ROUTING); + break; + case NFT_FIB_RESULT_ADDRTYPE: + if (priv->flags & NFTA_FIB_F_IIF) + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + else if (priv->flags & NFTA_FIB_F_OIF) + hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_FORWARD); + else + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING); + + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} +EXPORT_SYMBOL_GPL(nft_fib_validate); + +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fib *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS]) + return -EINVAL; + + priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS])); + + if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL)) + return -EINVAL; + + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == + (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) == + (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) + return -EINVAL; + + priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = sizeof(int); + break; + case NFT_FIB_RESULT_OIFNAME: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = IFNAMSIZ; + break; + case NFT_FIB_RESULT_ADDRTYPE: + len = sizeof(u32); + break; + default: + return -EINVAL; + } + + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); + if (err < 0) + return err; + + return nft_fib_validate(ctx, expr, NULL); +} +EXPORT_SYMBOL_GPL(nft_fib_init); + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg)) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result))) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags))) + return -1; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fib_dump); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index) +{ + struct net_device *dev; + u32 *dreg = reg; + + switch (r) { + case NFT_FIB_RESULT_OIF: + *dreg = index; + break; + case NFT_FIB_RESULT_OIFNAME: + dev = dev_get_by_index_rcu(pkt->net, index); + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + *dreg = 0; + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib_store_result); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c new file mode 100644 index 000000000000..fe8943b572b7 --- /dev/null +++ b/net/netfilter/nft_fib_inet.c @@ -0,0 +1,82 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +#include <net/netfilter/nft_fib.h> + +static void nft_fib_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (pkt->pf) { + case NFPROTO_IPV4: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case NFPROTO_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_fib_inet_type; +static const struct nft_expr_ops nft_fib_inet_ops = { + .type = &nft_fib_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_inet_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "fib", + .ops = &nft_fib_inet_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_inet_module_init(void) +{ + return nft_register_expr(&nft_fib_inet_type); +} + +static void __exit nft_fib_inet_module_exit(void) +{ + nft_unregister_expr(&nft_fib_inet_type); +} + +module_init(nft_fib_inet_module_init); +module_exit(nft_fib_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 09473b415b95..baf694de3935 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -44,6 +44,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_LEN] = { .type = NLA_U32 }, [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, + [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, }; static int nft_hash_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index d17018ff54e6..4528adea7ede 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6c1e0246706e..64994023bf81 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = 1 << NF_INET_PRE_ROUTING; + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 55bc5ab78d4a..a66b36097b8f 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, return -EOVERFLOW; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - atomic_set(&priv->counter, 0); + atomic_set(&priv->counter, priv->modulus - 1); return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index c6d5358482d1..fbc88009ca2e 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -28,22 +28,20 @@ static void nft_range_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); - bool mismatch; int d1, d2; d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); switch (priv->op) { case NFT_RANGE_EQ: - mismatch = (d1 < 0 || d2 > 0); + if (d1 < 0 || d2 > 0) + regs->verdict.code = NFT_BREAK; break; case NFT_RANGE_NEQ: - mismatch = (d1 >= 0 && d2 <= 0); + if (d1 >= 0 && d2 <= 0) + regs->verdict.code = NFT_BREAK; break; } - - if (mismatch) - regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { @@ -59,6 +57,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr struct nft_range_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc_from, desc_to; int err; + u32 op; err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), &desc_from, tb[NFTA_RANGE_FROM_DATA]); @@ -80,7 +79,20 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr if (err < 0) goto err2; - priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op); + if (err < 0) + goto err2; + + switch (op) { + case NFT_RANGE_EQ: + case NFT_RANGE_NEQ: + break; + default: + err = -EINVAL; + goto err2; + } + + priv->op = op; priv->len = desc_from.len; return 0; err2: diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c new file mode 100644 index 000000000000..9e5ec1f67020 --- /dev/null +++ b/net/netfilter/nft_rt.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/dst.h> +#include <net/ip6_route.h> +#include <net/route.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> + +struct nft_rt { + enum nft_rt_keys key:8; + enum nft_registers dreg:8; +}; + +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + u32 *dest = ®s->data[priv->dreg]; + const struct dst_entry *dst; + + dst = skb_dst(skb); + if (!dst) + goto err; + + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: + *dest = dst->tclassid; + break; +#endif + case NFT_RT_NEXTHOP4: + if (pkt->pf != NFPROTO_IPV4) + goto err; + + *dest = rt_nexthop((const struct rtable *)dst, + ip_hdr(skb)->daddr); + break; + case NFT_RT_NEXTHOP6: + if (pkt->pf != NFPROTO_IPV6) + goto err; + + memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + &ipv6_hdr(skb)->daddr), + sizeof(struct in6_addr)); + break; + default: + WARN_ON(1); + goto err; + } + return; + +err: + regs->verdict.code = NFT_BREAK; +} + +const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { + [NFTA_RT_DREG] = { .type = NLA_U32 }, + [NFTA_RT_KEY] = { .type = NLA_U32 }, +}; + +int nft_rt_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_rt *priv = nft_expr_priv(expr); + unsigned int len; + + if (tb[NFTA_RT_KEY] == NULL || + tb[NFTA_RT_DREG] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY])); + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: +#endif + case NFT_RT_NEXTHOP4: + len = sizeof(u32); + break; + case NFT_RT_NEXTHOP6: + len = sizeof(struct in6_addr); + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); +} + +int nft_rt_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_rt_type; +static const struct nft_expr_ops nft_rt_get_ops = { + .type = &nft_rt_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)), + .eval = nft_rt_get_eval, + .init = nft_rt_get_init, + .dump = nft_rt_get_dump, +}; + +static struct nft_expr_type nft_rt_type __read_mostly = { + .name = "rt", + .ops = &nft_rt_get_ops, + .policy = nft_rt_policy, + .maxattr = NFTA_RT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_rt_module_init(void) +{ + return nft_register_expr(&nft_rt_type); +} + +static void __exit nft_rt_module_exit(void) +{ + nft_unregister_expr(&nft_rt_type); +} + +module_init(nft_rt_module_init); +module_exit(nft_rt_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>"); +MODULE_ALIAS_NFT_EXPR("rt"); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e0aa7c1d0224..fc4977456c30 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1513,7 +1513,7 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) if (!num_hooks) return ERR_PTR(-EINVAL); - ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); + ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 018eed7e1ff1..8668a5c18dc3 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -32,6 +32,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + li.u.ulog.flags = 0; if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2fab0c65aa94..b89b688e9d01 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -431,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) -#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -473,7 +473,7 @@ static u64 user2credits(u64 user, int revision) return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1, XT_HASHLIMIT_SCALE); } else { - if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY)) return div64_u64(user, XT_HASHLIMIT_SCALE_v2) * HZ * CREDITS_PER_JIFFY; diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 89d53104c6b3..000e70377f85 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -26,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fan Du <fan.du@windriver.com>"); MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); +MODULE_ALIAS("ipt_ipcomp"); +MODULE_ALIAS("ip6t_ipcomp"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index ac1d3c3d09e7..ec06fb1cb16f 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -42,29 +42,31 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, e = minfo->ports[++i]; pr_debug("src or dst matches with %d-%d?\n", s, e); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src >= s && src <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst >= s && dst <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && ((dst >= s && dst <= e) - || (src >= s && src <= e))) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + return (src >= s && src <= e) ^ minfo->invert; + case XT_MULTIPORT_DESTINATION: + return (dst >= s && dst <= e) ^ minfo->invert; + case XT_MULTIPORT_EITHER: + return ((dst >= s && dst <= e) || + (src >= s && src <= e)) ^ minfo->invert; + default: + break; + } } else { /* exact port matching */ pr_debug("src or dst matches with %d?\n", s); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && (src == s || dst == s)) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + return (src == s) ^ minfo->invert; + case XT_MULTIPORT_DESTINATION: + return (dst == s) ^ minfo->invert; + case XT_MULTIPORT_EITHER: + return (src == s || dst == s) ^ minfo->invert; + default: + break; + } } } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index b10ade272b50..018c369c9f0d 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,76 +22,14 @@ #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -#define XT_SOCKET_HAVE_IPV6 1 #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/inet6_hashtables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif +#include <net/netfilter/nf_socket.h> #include <linux/netfilter/xt_socket.h> -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#define XT_SOCKET_HAVE_CONNTRACK 1 -#include <net/netfilter/nf_conntrack.h> -#endif - -static int -extract_icmp4_fields(const struct sk_buff *skb, - u8 *protocol, - __be32 *raddr, - __be32 *laddr, - __be16 *rport, - __be16 *lport) -{ - unsigned int outside_hdrlen = ip_hdrlen(skb); - struct iphdr *inside_iph, _inside_iph; - struct icmphdr *icmph, _icmph; - __be16 *ports, _ports[2]; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - switch (icmph->type) { - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_REDIRECT: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - break; - default: - return 1; - } - - inside_iph = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr), - sizeof(_inside_iph), &_inside_iph); - if (inside_iph == NULL) - return 1; - - if (inside_iph->protocol != IPPROTO_TCP && - inside_iph->protocol != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr) + - (inside_iph->ihl << 2), - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_iph->protocol; - *laddr = inside_iph->saddr; - *lport = ports[0]; - *raddr = inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - /* "socket" match based redirection (no specific rule) * =================================================== * @@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb, * then local services could intercept traffic going through the * box. */ -static struct sock * -xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, - const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - return NULL; -} - -static bool xt_socket_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - - default: - return inet_sk(sk)->transparent; - } -} - -static struct sock *xt_socket_lookup_slow_v4(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - const struct iphdr *iph = ip_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); - u8 uninitialized_var(protocol); -#ifdef XT_SOCKET_HAVE_CONNTRACK - struct nf_conn const *ct; - enum ip_conntrack_info ctinfo; -#endif - - if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - protocol = iph->protocol; - saddr = iph->saddr; - sport = hp->source; - daddr = iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = iph->protocol == IPPROTO_TCP ? - ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : - ip_hdrlen(skb) + sizeof(*hp); - - } else if (iph->protocol == IPPROTO_ICMP) { - if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return NULL; - } else { - return NULL; - } - -#ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in - * case this is a reply packet of an established - * SNAT-ted connection. - */ - ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && - ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_ESTABLISHED_REPLY) || - (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_RELATED_REPLY)) && - (ct->status & IPS_SRC_NAT_DONE)) { - - daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; - dport = (iph->protocol == IPPROTO_TCP) ? - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - } -#endif - - return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, - daddr, sport, dport, indev); -} - static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) return socket_match(skb, par, par->matchinfo); } -#ifdef XT_SOCKET_HAVE_IPV6 - -static int -extract_icmp6_fields(const struct sk_buff *skb, - unsigned int outside_hdrlen, - int *protocol, - const struct in6_addr **raddr, - const struct in6_addr **laddr, - __be16 *rport, - __be16 *lport, - struct ipv6hdr *ipv6_var) -{ - const struct ipv6hdr *inside_iph; - struct icmp6hdr *icmph, _icmph; - __be16 *ports, _ports[2]; - u8 inside_nexthdr; - __be16 inside_fragoff; - int inside_hdrlen; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) - return 1; - - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), - sizeof(*ipv6_var), ipv6_var); - if (inside_iph == NULL) - return 1; - inside_nexthdr = inside_iph->nexthdr; - - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + - sizeof(*ipv6_var), - &inside_nexthdr, &inside_fragoff); - if (inside_hdrlen < 0) - return 1; /* hjm: Packet has no/incomplete transport layer headers. */ - - if (inside_nexthdr != IPPROTO_TCP && - inside_nexthdr != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, inside_hdrlen, - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_nexthdr; - *laddr = &inside_iph->saddr; - *lport = ports[0]; - *raddr = &inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - -static struct sock * -xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, - const u8 protocol, - const struct in6_addr *saddr, const struct in6_addr *daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp6_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - - return NULL; -} - -static struct sock *xt_socket_lookup_slow_v6(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - __be16 uninitialized_var(dport), uninitialized_var(sport); - const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - int thoff = 0, tproto; - - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { - pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NULL; - } - - if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - saddr = &iph->saddr; - sport = hp->source; - daddr = &iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = tproto == IPPROTO_TCP ? - thoff + __tcp_hdrlen((struct tcphdr *)hp) : - thoff + sizeof(*hp); - - } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport, &ipv6_var)) - return NULL; - } else { - return NULL; - } - - return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, - sport, dport, indev); -} - +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { @@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -488,7 +203,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 1, @@ -512,7 +227,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 2, @@ -536,7 +251,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 3, @@ -554,7 +269,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_defrag_ipv6_enable(); #endif diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index df0cbcddda2c..bbd3bff885a1 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -362,8 +362,10 @@ int genl_register_family(struct genl_family *family) family->id = idr_alloc(&genl_fam_idr, family, start, end + 1, GFP_KERNEL); - if (!family->id) + if (family->id < 0) { + err = family->id; goto errout_locked; + } err = genl_validate_assign_mc_groups(family); if (err) @@ -399,7 +401,7 @@ int genl_unregister_family(const struct genl_family *family) { genl_lock_all(); - if (genl_family_find_byid(family->id)) { + if (!genl_family_find_byid(family->id)) { genl_unlock_all(); return -ENOENT; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 11db0d619c00..d2238b204691 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } diff --git a/net/rds/Makefile b/net/rds/Makefile index 0e72bec1529f..56c7d27eefee 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -13,5 +13,5 @@ obj-$(CONFIG_RDS_TCP) += rds_tcp.o rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ tcp_send.o tcp_stats.o -ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG +ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG diff --git a/net/rds/rds.h b/net/rds/rds.h index 25532a46602f..4121e1862444 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -33,7 +33,7 @@ #define KERNEL_HAS_ATOMIC64 #endif -#ifdef DEBUG +#ifdef RDS_DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fcddacc92e01..3296a6ac583a 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void rds_tcp_tc_info(struct socket *sock, unsigned int len, +static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { @@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, unsigned long flags; struct sockaddr_in sin; int sinlen; + struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0); - tsinfo.local_addr = sin.sin_addr.s_addr; - tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1); - tsinfo.peer_addr = sin.sin_addr.s_addr; - tsinfo.peer_port = sin.sin_port; + sock = tc->t_sock; + if (sock) { + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 0); + tsinfo.local_addr = sin.sin_addr.s_addr; + tsinfo.local_port = sin.sin_port; + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 1); + tsinfo.peer_addr = sin.sin_addr.s_addr; + tsinfo.peer_port = sin.sin_port; + } tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.data_rem = tc->t_tinc_data_rem; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index e0b23fb5b8d5..c9c496844cd7 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -103,7 +103,7 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) if (!peer_is_smaller) return NULL; - for (i = 1; i < npaths; i++) { + for (i = 0; i < npaths; i++) { struct rds_conn_path *cp = &conn->c_path[i]; if (rds_conn_path_transition(cp, RDS_CONN_DOWN, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4353a29f3b57..1ed18d8c9c9f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -276,7 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto error; trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, ERR_PTR(ret)); + here, NULL); spin_lock_bh(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d..1d87b5453ef7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk) ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(udp_sk, 0, 1, &ret); + skb = skb_recv_udp(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk) __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); - /* The socket buffer we have is owned by UDP, with UDP's data all over - * it, but we really want our own data there. + /* The UDP protocol already released all skb resources; + * we are free to add our own data there. */ - skb_orphan(skb); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 941b724d523b..862eea6b266c 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -193,8 +193,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) fl6->fl6_dport = htons(7001); fl6->fl6_sport = htons(7000); dst = ip6_route_output(&init_net, NULL, fl6); - if (IS_ERR(dst)) { - _leave(" [route err %ld]", PTR_ERR(dst)); + if (dst->error) { + _leave(" [route err %d]", dst->error); return; } break; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a512b18c0088..f893d180da1c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1028,8 +1028,7 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n) if (tb[1] == NULL) return NULL; - if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]), - nla_len(tb[1]), NULL) < 0) + if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2d93be6717e5..6073a1132725 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -249,8 +249,11 @@ out: static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, u64 lastuse) { - tcf_lastuse_update(&a->tcfa_tm); + struct tcf_mirred *m = to_mirred(a); + struct tcf_t *tm = &m->tcf_tm; + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + tm->lastuse = lastuse; } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index af47bdf2f483..edc720f11687 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -16,7 +16,6 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> -#include <net/dst_metadata.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> @@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, }; static int tunnel_key_init(struct net *net, struct nlattr *nla, @@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; + __be16 dst_port = 0; __be64 key_id; int ret = 0; int err; @@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) + dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { __be32 saddr; @@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, - TUNNEL_KEY, key_id, 0); + dst_port, TUNNEL_KEY, + key_id, 0); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { struct in6_addr saddr; @@ -130,7 +135,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, - TUNNEL_KEY, key_id, 0); + dst_port, TUNNEL_KEY, + key_id, 0); } if (!metadata) { @@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || tunnel_key_dump_addresses(skb, - ¶ms->tcft_enc_metadata->u.tun_info)) + ¶ms->tcft_enc_metadata->u.tun_info) || + nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) goto nla_put_failure; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ee29a3375f6..2b2a7974e4bb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -345,7 +345,8 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, + tfilter_notify(net, skb, n, tp, + t->tcm_handle, RTM_DELTFILTER, false); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6f40fba599b..e8dd09af0d0c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -43,6 +43,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv4_addrs enc_ipv4; struct flow_dissector_key_ipv6_addrs enc_ipv6; }; + struct flow_dissector_key_ports enc_tp; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -155,6 +156,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, } skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + skb_key.enc_tp.src = key->tp_src; + skb_key.enc_tp.dst = key->tp_dst; } skb_key.indev_ifindex = skb->skb_iif; @@ -269,6 +272,14 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); } +static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) +{ + list_del_rcu(&f->list); + fl_hw_destroy_filter(tp, (unsigned long)f); + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, fl_destroy_filter); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -277,11 +288,8 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) if (!force && !list_empty(&head->filters)) return false; - list_for_each_entry_safe(f, next, &head->filters, list) { - fl_hw_destroy_filter(tp, (unsigned long)f); - list_del_rcu(&f->list); - call_rcu(&f->rcu, fl_destroy_filter); - } + list_for_each_entry_safe(f, next, &head->filters, list) + __fl_delete(tp, f); RCU_INIT_POINTER(tp->root, NULL); if (head->mask_assigned) rhashtable_destroy(&head->ht); @@ -339,6 +347,14 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -448,6 +464,13 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); + } else if (key->basic.ip_proto == IPPROTO_SCTP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC, + &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, + &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, + sizeof(key->tp.dst)); } if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || @@ -484,6 +507,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id.keyid)); + fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)); + + fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst)); + return 0; } @@ -551,6 +582,18 @@ static void fl_init_dissector(struct cls_fl_head *head, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6); + if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) || + FL_KEY_IS_MASKED(&mask->key, enc_ipv6)) + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL, + enc_control); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); skb_flow_dissector_init(&head->dissector, keys, cnt); } @@ -741,10 +784,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); - list_del_rcu(&f->list); - fl_hw_destroy_filter(tp, (unsigned long)f); - tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + __fl_delete(tp, f); return 0; } @@ -895,6 +935,14 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_SCTP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC, + &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, + &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, + sizeof(key->tp.dst)))) + goto nla_put_failure; if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->enc_ipv4.src, @@ -920,7 +968,17 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, &mask->enc_key_id, TCA_FLOWER_UNSPEC, - sizeof(key->enc_key_id))) + sizeof(key->enc_key_id)) || + fl_dump_key_val(skb, &key->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)) || + fl_dump_key_val(skb, &key->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst))) goto nla_put_failure; nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 206dc24add3a..f337f1bdd1d4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -960,6 +960,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev, sch->handle = handle; + /* This exist to keep backward compatible with a userspace + * loophole, what allowed userspace to get IFF_NO_QUEUE + * facility on older kernels by setting tx_queue_len=0 (prior + * to qdisc init), and then forgot to reinit tx_queue_len + * before again attaching a qdisc. + */ + if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); + } + if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (qdisc_is_percpu_stats(sch)) { sch->cpu_bstats = diff --git a/net/sctp/output.c b/net/sctp/output.c index 4282b488985b..f5320a87341e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -399,181 +399,72 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) atomic_inc(&sk->sk_wmem_alloc); } -/* All packets are sent to the network through this function from - * sctp_outq_tail(). - * - * The return value is a normal kernel error return value. - */ -int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) +static int sctp_packet_pack(struct sctp_packet *packet, + struct sk_buff *head, int gso, gfp_t gfp) { struct sctp_transport *tp = packet->transport; - struct sctp_association *asoc = tp->asoc; - struct sctphdr *sh; - struct sk_buff *nskb = NULL, *head = NULL; + struct sctp_auth_chunk *auth = NULL; struct sctp_chunk *chunk, *tmp; - struct sock *sk; - int err = 0; - int padding; /* How much padding do we need? */ - int pkt_size; - __u8 has_data = 0; - int gso = 0; - int pktcount = 0; - struct dst_entry *dst; - unsigned char *auth = NULL; /* pointer to auth in skb data */ - - pr_debug("%s: packet:%p\n", __func__, packet); + int pkt_count = 0, pkt_size; + struct sock *sk = head->sk; + struct sk_buff *nskb; + int auth_len = 0; - /* Do NOT generate a chunkless packet. */ - if (list_empty(&packet->chunk_list)) - return err; - - /* Set up convenience variables... */ - chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); - sk = chunk->skb->sk; - - /* Allocate the head skb, or main one if not in GSO */ - if (packet->size > tp->pathmtu && !packet->ipfragok) { - if (sk_can_gso(sk)) { - gso = 1; - pkt_size = packet->overhead; - } else { - /* If this happens, we trash this packet and try - * to build a new one, hopefully correct this - * time. Application may notice this error. - */ - pr_err_once("Trying to GSO but underlying device doesn't support it."); - goto err; - } - } else { - pkt_size = packet->size; - } - head = alloc_skb(pkt_size + MAX_HEADER, gfp); - if (!head) - goto err; if (gso) { - NAPI_GRO_CB(head)->last = head; skb_shinfo(head)->gso_type = sk->sk_gso_type; + NAPI_GRO_CB(head)->last = head; + } else { + nskb = head; + pkt_size = packet->size; + goto merge; } - /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(head, packet->overhead + MAX_HEADER); - - /* Set the owning socket so that we know where to get the - * destination IP address. - */ - sctp_packet_set_owner_w(head, sk); - - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { - sctp_assoc_sync_pmtu(sk, asoc); - } - } - dst = dst_clone(tp->dst); - if (!dst) { - if (asoc) - IP_INC_STATS(sock_net(asoc->base.sk), - IPSTATS_MIB_OUTNOROUTES); - goto nodst; - } - skb_dst_set(head, dst); - - /* Build the SCTP header. */ - sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); - skb_reset_transport_header(head); - sh->source = htons(packet->source_port); - sh->dest = htons(packet->destination_port); - - /* From 6.8 Adler-32 Checksum Calculation: - * After the packet is constructed (containing the SCTP common - * header and one or more control or DATA chunks), the - * transmitter shall: - * - * 1) Fill in the proper Verification Tag in the SCTP common - * header and initialize the checksum field to 0's. - */ - sh->vtag = htonl(packet->vtag); - sh->checksum = 0; - - pr_debug("***sctp_transmit_packet***\n"); - do { - /* Set up convenience variables... */ - chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); - pktcount++; - - /* Calculate packet size, so it fits in PMTU. Leave - * other chunks for the next packets. - */ - if (gso) { - pkt_size = packet->overhead; - list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = SCTP_PAD4(chunk->skb->len); - - if (pkt_size + padded > tp->pathmtu) - break; - pkt_size += padded; - } + /* calculate the pkt_size and alloc nskb */ + pkt_size = packet->overhead; + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, + list) { + int padded = SCTP_PAD4(chunk->skb->len); - /* Allocate a new skb. */ - nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); - if (!nskb) - goto nomem; - - /* Make sure the outbound skb has enough header - * room reserved. - */ - skb_reserve(nskb, packet->overhead + MAX_HEADER); - } else { - nskb = head; + if (chunk == packet->auth) + auth_len = padded; + else if (auth_len + padded + packet->overhead > + tp->pathmtu) + return 0; + else if (pkt_size + padded > tp->pathmtu) + break; + pkt_size += padded; } + nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); + if (!nskb) + return 0; + skb_reserve(nskb, packet->overhead + MAX_HEADER); - /** - * 3.2 Chunk Field Descriptions - * - * The total length of a chunk (including Type, Length and - * Value fields) MUST be a multiple of 4 bytes. If the length - * of the chunk is not a multiple of 4 bytes, the sender MUST - * pad the chunk with all zero bytes and this padding is not - * included in the chunk length field. The sender should - * never pad with more than 3 bytes. - * - * [This whole comment explains SCTP_PAD4() below.] - */ - +merge: + /* merge chunks into nskb and append nskb into head list */ pkt_size -= packet->overhead; list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { + int padding; + list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ - if (!sctp_chunk_retransmitted(chunk) && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } - - has_data = 1; } padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); - /* if this is the auth chunk that we are adding, - * store pointer where it will be added and put - * the auth into the packet. - */ if (chunk == packet->auth) - auth = skb_tail_pointer(nskb); + auth = (struct sctp_auth_chunk *) + skb_tail_pointer(nskb); - memcpy(skb_put(nskb, chunk->skb->len), - chunk->skb->data, chunk->skb->len); + memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, + chunk->skb->len); pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", chunk, @@ -583,11 +474,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ntohs(chunk->chunk_hdr->length), chunk->skb->len, chunk->rtt_in_progress); - /* If this is a control chunk, this is our last - * reference. Free data chunks after they've been - * acknowledged or have failed. - * Re-queue auth chunks if needed. - */ pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) @@ -597,160 +483,161 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) break; } - /* SCTP-AUTH, Section 6.2 - * The sender MUST calculate the MAC as described in RFC2104 [2] - * using the hash function H as described by the MAC Identifier and - * the shared association key K based on the endpoint pair shared key - * described by the shared key identifier. The 'data' used for the - * computation of the AUTH-chunk is given by the AUTH chunk with its - * HMAC field set to zero (as shown in Figure 6) followed by all - * chunks that are placed after the AUTH chunk in the SCTP packet. - */ - if (auth) - sctp_auth_calculate_hmac(asoc, nskb, - (struct sctp_auth_chunk *)auth, - gfp); - - if (packet->auth) { - if (!list_empty(&packet->chunk_list)) { - /* We will generate more packets, so re-queue - * auth chunk. - */ + if (auth) { + sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp); + /* free auth if no more chunks, or add it back */ + if (list_empty(&packet->chunk_list)) + sctp_chunk_free(packet->auth); + else list_add(&packet->auth->list, &packet->chunk_list); - } else { - sctp_chunk_free(packet->auth); - packet->auth = NULL; - } } - if (!gso) - break; - - if (skb_gro_receive(&head, nskb)) { - kfree_skb(nskb); - goto nomem; + if (gso) { + if (skb_gro_receive(&head, nskb)) { + kfree_skb(nskb); + return 0; + } + if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= + sk->sk_gso_max_segs)) + return 0; } - nskb = NULL; - if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= - sk->sk_gso_max_segs)) - goto nomem; + + pkt_count++; } while (!list_empty(&packet->chunk_list)); - /* 2) Calculate the Adler-32 checksum of the whole packet, - * including the SCTP common header and all the - * chunks. - * - * Note: Adler-32 is no longer applicable, as has been replaced - * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. - * - * If it's a GSO packet, it's postponed to sctp_skb_segment. - */ - if (!sctp_checksum_disable || gso) { - if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) || - dst_xfrm(dst) || packet->ipfragok)) { - sh->checksum = sctp_compute_cksum(head, 0); - } else { - /* no need to seed pseudo checksum for SCTP */ - head->ip_summed = CHECKSUM_PARTIAL; - head->csum_start = skb_transport_header(head) - head->head; - head->csum_offset = offsetof(struct sctphdr, checksum); + if (gso) { + memset(head->cb, 0, max(sizeof(struct inet_skb_parm), + sizeof(struct inet6_skb_parm))); + skb_shinfo(head)->gso_segs = pkt_count; + skb_shinfo(head)->gso_size = GSO_BY_FRAGS; + rcu_read_lock(); + if (skb_dst(head) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); } + rcu_read_unlock(); + goto chksum; } - /* IP layer ECN support - * From RFC 2481 - * "The ECN-Capable Transport (ECT) bit would be set by the - * data sender to indicate that the end-points of the - * transport protocol are ECN-capable." - * - * Now setting the ECT bit all the time, as it should not cause - * any problems protocol-wise even if our peer ignores it. - * - * Note: The works for IPv6 layer checks this bit too later - * in transmission. See IP6_ECN_flow_xmit(). - */ - tp->af_specific->ecn_capable(sk); + if (sctp_checksum_disable) + return 1; - /* Set up the IP options. */ - /* BUG: not implemented - * For v4 this all lives somewhere in sk->sk_opt... - */ + if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) || + dst_xfrm(skb_dst(head)) || packet->ipfragok) { + struct sctphdr *sh = + (struct sctphdr *)skb_transport_header(head); - /* Dump that on IP! */ - if (asoc) { - asoc->stats.opackets += pktcount; - if (asoc->peer.last_sent_to != tp) - /* Considering the multiple CPU scenario, this is a - * "correcter" place for last_sent_to. --xguo - */ - asoc->peer.last_sent_to = tp; + sh->checksum = sctp_compute_cksum(head, 0); + } else { +chksum: + head->ip_summed = CHECKSUM_PARTIAL; + head->csum_start = skb_transport_header(head) - head->head; + head->csum_offset = offsetof(struct sctphdr, checksum); } - if (has_data) { - struct timer_list *timer; - unsigned long timeout; + return pkt_count; +} + +/* All packets are sent to the network through this function from + * sctp_outq_tail(). + * + * The return value is always 0 for now. + */ +int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) +{ + struct sctp_transport *tp = packet->transport; + struct sctp_association *asoc = tp->asoc; + struct sctp_chunk *chunk, *tmp; + int pkt_count, gso = 0; + struct dst_entry *dst; + struct sk_buff *head; + struct sctphdr *sh; + struct sock *sk; - /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { - timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; - timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; + pr_debug("%s: packet:%p\n", __func__, packet); + if (list_empty(&packet->chunk_list)) + return 0; + chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); + sk = chunk->skb->sk; - if (!mod_timer(timer, jiffies + timeout)) - sctp_association_hold(asoc); + /* check gso */ + if (packet->size > tp->pathmtu && !packet->ipfragok) { + if (!sk_can_gso(sk)) { + pr_err_once("Trying to GSO but underlying device doesn't support it."); + goto out; } + gso = 1; + } + + /* alloc head skb */ + head = alloc_skb((gso ? packet->overhead : packet->size) + + MAX_HEADER, gfp); + if (!head) + goto out; + skb_reserve(head, packet->overhead + MAX_HEADER); + sctp_packet_set_owner_w(head, sk); + + /* set sctp header */ + sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); + skb_reset_transport_header(head); + sh->source = htons(packet->source_port); + sh->dest = htons(packet->destination_port); + sh->vtag = htonl(packet->vtag); + sh->checksum = 0; + + /* update dst if in need */ + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(sk, asoc); } + dst = dst_clone(tp->dst); + if (!dst) { + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(head); + goto out; + } + skb_dst_set(head, dst); + /* pack up chunks */ + pkt_count = sctp_packet_pack(packet, head, gso, gfp); + if (!pkt_count) { + kfree_skb(head); + goto out; + } pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); - if (gso) { - /* Cleanup our debris for IP stacks */ - memset(head->cb, 0, max(sizeof(struct inet_skb_parm), - sizeof(struct inet6_skb_parm))); + /* start autoclose timer */ + if (packet->has_data && sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { + struct timer_list *timer = + &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; + unsigned long timeout = + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; - skb_shinfo(head)->gso_segs = pktcount; - skb_shinfo(head)->gso_size = GSO_BY_FRAGS; + if (!mod_timer(timer, jiffies + timeout)) + sctp_association_hold(asoc); + } - /* We have to refresh this in case we are xmiting to - * more than one transport at a time - */ - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - rcu_read_unlock(); + /* sctp xmit */ + tp->af_specific->ecn_capable(sk); + if (asoc) { + asoc->stats.opackets += pkt_count; + if (asoc->peer.last_sent_to != tp) + asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(head, tp); - goto out; - -nomem: - if (packet->auth && list_empty(&packet->auth->list)) - sctp_chunk_free(packet->auth); - -nodst: - /* FIXME: Returning the 'err' will effect all the associations - * associated with a socket, although only one of the paths of the - * association is unreachable. - * The real failure of a transport or association can be passed on - * to the user via notifications. So setting this error may not be - * required. - */ - /* err = -EHOSTUNREACH; */ - kfree_skb(head); -err: +out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } - -out: sctp_packet_reset(packet); - return err; + return 0; } /******************************************************************** diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 026e3bca4a94..8ec20a64a3f8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3422,6 +3422,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, * do things that are type appropriate. */ @@ -3453,12 +3459,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index fb02c7033307..9fbb6feb8c27 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4687,7 +4687,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len <= 0) + if (len == 0) return -EINVAL; if (len > sizeof(struct sctp_event_subscribe)) len = sizeof(struct sctp_event_subscribe); @@ -6430,6 +6430,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (len < 0) + return -EINVAL; + lock_sock(sk); switch (optname) { diff --git a/net/socket.c b/net/socket.c index 5a9bf5ee2464..4ce33c35e606 100644 --- a/net/socket.c +++ b/net/socket.c @@ -518,8 +518,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** @@ -877,6 +891,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +static struct ns_common *get_net_ns(struct ns_common *ns) +{ + return &get_net(container_of(ns, struct net, ns))->ns; +} + static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; @@ -945,6 +964,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = dlci_ioctl_hook(cmd, argp); mutex_unlock(&dlci_ioctl_mutex); break; + case SIOCGSKNS: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + + err = open_related_ns(&net->ns, get_net_ns); + break; default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -3093,6 +3119,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFVLAN: case SIOCADDDLCI: case SIOCDELDLCI: + case SIOCGSKNS: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e2a55dc787e6..78da4aee3543 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err >= 0) - skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); if (skb == NULL) { if (err != -EAGAIN) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1758665d609c..7178d0aa7861 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_udp(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); consume_skb(skb); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6f145b592a53..017801f9dbaa 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -768,6 +768,9 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; int err; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + err = switchdev_port_attr_get(dev, &attr); if (err && err != -EOPNOTSUPP) return err; @@ -923,6 +926,9 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlattr *afspec; int err = 0; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); if (protinfo) { @@ -956,6 +962,9 @@ int switchdev_port_bridge_dellink(struct net_device *dev, { struct nlattr *afspec; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (afspec) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 753f774cb46f..aa1babbea385 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -247,11 +247,17 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) * * RCU is locked, no other locks set */ -void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + u16 acked = msg_bcast_ack(hdr); struct sk_buff_head xmitq; + /* Ignore bc acks sent by peer before bcast synch point was received */ + if (msg_bc_ack_invalid(hdr)) + return; + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); @@ -279,11 +285,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); - if (msg_type(hdr) == STATE_MSG) { + if (msg_type(hdr) != STATE_MSG) { + tipc_link_bc_init_rcv(l, hdr); + } else if (!msg_bc_ack_invalid(hdr)) { tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); - } else { - tipc_link_bc_init_rcv(l, hdr); } tipc_bcast_unlock(net); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 5ffe34472ccd..855d53c64ab3 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,8 @@ void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); -void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); diff --git a/net/tipc/link.c b/net/tipc/link.c index b36e16cdc945..1055164c6232 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1312,6 +1312,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_ack_invalid(hdr, !node_up); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); @@ -1574,6 +1575,7 @@ static void tipc_link_build_bc_init_msg(struct tipc_link *l, __skb_queue_head_init(&list); if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) return; + msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); tipc_link_xmit(l, &list, xmitq); } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c3832cdf2278..8d408612ffa4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -95,7 +95,7 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - void *handle; + u32 bytes_read; struct sk_buff *tail; bool validated; bool wakeup_pending; @@ -714,6 +714,23 @@ static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) msg_set_bits(m, 5, 13, 0x1, s); } +static inline bool msg_bc_ack_invalid(struct tipc_msg *m) +{ + switch (msg_user(m)) { + case BCAST_PROTOCOL: + case NAME_DISTRIBUTOR: + case LINK_PROTOCOL: + return msg_bits(m, 5, 14, 0x1); + default: + return false; + } +} + +static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid) +{ + msg_set_bits(m, 5, 14, 0x1, invalid); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index a04fe9be1c60..c1cfd92de17a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -156,6 +156,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, pr_warn("Bulk publication failure\n"); return; } + msg_set_bc_ack_invalid(buf_msg(skb), true); item = (struct distr_item *)msg_data(buf_msg(skb)); } diff --git a/net/tipc/node.c b/net/tipc/node.c index 7ef14e2d2356..9d2f4c2b08ab 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1535,7 +1535,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(usr == LINK_PROTOCOL)) tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) - tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); + tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..149396366e80 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -44,44 +44,43 @@ #include "bcast.h" #include "netlink.h" -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ - #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 +enum { + TIPC_LISTEN = TCP_LISTEN, + TIPC_ESTABLISHED = TCP_ESTABLISHED, + TIPC_OPEN = TCP_CLOSE, + TIPC_DISCONNECTING = TCP_CLOSE_WAIT, + TIPC_CONNECTING = TCP_SYN_SENT, +}; + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API - * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages - * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_intv: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer - * @remote: 'connected' peer for dgram/rdm + * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ struct tipc_sock { struct sock sk; - int connected; u32 conn_type; u32 conn_instance; int published; @@ -91,17 +90,16 @@ struct tipc_sock { struct list_head sock_list; struct list_head publications; u32 pub_count; - u32 probing_state; - unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; + bool probe_unacked; bool link_cong; u16 snt_unacked; u16 snd_win; u16 peer_caps; u16 rcv_unacked; u16 rcv_win; - struct sockaddr_tipc remote; + struct sockaddr_tipc peer; struct rhash_head node; struct rcu_head rcu; }; @@ -294,6 +292,21 @@ static void tsk_rej_rx_queue(struct sock *sk) tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } +static bool tipc_sk_connected(struct sock *sk) +{ + return sk->sk_state == TIPC_ESTABLISHED; +} + +/* tipc_sk_type_connectionless - check if the socket is datagram socket + * @sk: socket + * + * Returns true if connection less, false otherwise + */ +static bool tipc_sk_type_connectionless(struct sock *sk) +{ + return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; +} + /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from @@ -301,12 +314,13 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { - struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; - if (unlikely(!tsk->connected)) + if (unlikely(!tipc_sk_connected(sk))) return false; if (unlikely(msg_origport(msg) != peer_port)) @@ -327,6 +341,45 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) return false; } +/* tipc_set_sk_state - set the sk_state of the socket + * @sk: socket + * + * Caller must hold socket lock + * + * Returns 0 on success, errno otherwise + */ +static int tipc_set_sk_state(struct sock *sk, int state) +{ + int oldsk_state = sk->sk_state; + int res = -EINVAL; + + switch (state) { + case TIPC_OPEN: + res = 0; + break; + case TIPC_LISTEN: + case TIPC_CONNECTING: + if (oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_ESTABLISHED: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_DISCONNECTING: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_ESTABLISHED) + res = 0; + break; + } + + if (!res) + sk->sk_state = state; + + return res; +} + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -344,7 +397,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, { struct tipc_net *tn; const struct proto_ops *ops; - socket_state state; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; @@ -356,16 +408,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, switch (sock->type) { case SOCK_STREAM: ops = &stream_ops; - state = SS_UNCONNECTED; break; case SOCK_SEQPACKET: ops = &packet_ops; - state = SS_UNCONNECTED; break; case SOCK_DGRAM: case SOCK_RDM: ops = &msg_ops; - state = SS_READY; break; default: return -EPROTOTYPE; @@ -386,14 +435,15 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = state; sock_init_data(sock, sk); + tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -406,11 +456,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); tsk->rcv_win = tsk->snd_win; - if (sock->state == SS_READY) { + if (tipc_sk_type_connectionless(sk)) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); } + return 0; } @@ -421,6 +472,44 @@ static void tipc_sk_callback(struct rcu_head *head) sock_put(&tsk->sk); } +/* Caller should hold socket lock for the socket. */ +static void __tipc_shutdown(struct socket *sock, int error) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + u32 dnode = tsk_peer_node(tsk); + struct sk_buff *skb; + + /* Reject all unreceived messages, except on an active connection + * (which disconnects locally & sends a 'FIN+' to peer). + */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (TIPC_SKB_CB(skb)->bytes_read) { + kfree_skb(skb); + } else { + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + } + tipc_sk_respond(sk, skb, error); + } + } + if (sk->sk_state != TIPC_DISCONNECTING) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, error); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (!tipc_sk_type_connectionless(sk)) { + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + } + } +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -440,10 +529,7 @@ static void tipc_sk_callback(struct rcu_head *head) static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net; struct tipc_sock *tsk; - struct sk_buff *skb; - u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -452,47 +538,16 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; - net = sock_net(sk); tsk = tipc_sk(sk); lock_sock(sk); - /* - * Reject all unreceived messages, except on an active connection - * (which disconnects locally & sends a 'FIN+' to peer) - */ - dnode = tsk_peer_node(tsk); - while (sock->state != SS_DISCONNECTING) { - skb = __skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - break; - if (TIPC_SKB_CB(skb)->handle != NULL) - kfree_skb(skb); - else { - if ((sock->state == SS_CONNECTING) || - (sock->state == SS_CONNECTED)) { - sock->state = SS_DISCONNECTING; - tsk->connected = 0; - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); - } - } - + __tipc_shutdown(sock, TIPC_ERR_NO_PORT); + sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); - if (tsk->connected) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, - tsk_own_node(tsk), tsk_peer_port(tsk), - tsk->portid, TIPC_ERR_NO_PORT); - if (skb) - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); - } /* Reject any messages that accumulated in backlog queue */ - sock->state = SS_DISCONNECTING; release_sock(sk); call_rcu(&tsk->rcu, tipc_sk_callback); @@ -578,13 +633,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsk = tipc_sk(sock->sk); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { - if ((sock->state != SS_CONNECTED) && - ((peer != 2) || (sock->state != SS_DISCONNECTING))) + if ((!tipc_sk_connected(sk)) && + ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); @@ -616,28 +672,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * exits. TCP and other protocols seem to rely on higher level poll routines * to handle any preventable race conditions, so TIPC will do the same ... * - * TIPC sets the returned events as follows: - * - * socket state flags set - * ------------ --------- - * unconnected no read flags - * POLLOUT if port is not congested - * - * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue - * no write flags - * - * connected POLLIN/POLLRDNORM if data in rx queue - * POLLOUT if port is not congested - * - * disconnecting POLLIN/POLLRDNORM/POLLHUP - * no write flags - * - * listening POLLIN if SYN in rx queue - * no write flags - * - * ready POLLIN/POLLRDNORM if data in rx queue - * [connectionless] POLLOUT (since port cannot be congested) - * * IMPORTANT: The fact that a read or write operation is indicated does NOT * imply that the operation will succeed, merely that it should be performed * and will not block. @@ -651,22 +685,29 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); - switch ((int)sock->state) { - case SS_UNCONNECTED: - if (!tsk->link_cong) - mask |= POLLOUT; - break; - case SS_READY: - case SS_CONNECTED: + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP | POLLIN | POLLRDNORM; + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + switch (sk->sk_state) { + case TIPC_ESTABLISHED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ - case SS_CONNECTING: - case SS_LISTENING: + case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; - case SS_DISCONNECTING: + case TIPC_OPEN: + if (!tsk->link_cong) + mask |= POLLOUT; + if (tipc_sk_type_connectionless(sk) && + (!skb_queue_empty(&sk->sk_receive_queue))) + mask |= (POLLIN | POLLRDNORM); + break; + case TIPC_DISCONNECTING: mask = (POLLIN | POLLRDNORM | POLLHUP); break; } @@ -697,6 +738,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, uint mtu; int rc; + if (!timeo && tsk->link_cong) + return -ELINKCONG; + msg_set_type(mhdr, TIPC_MCAST_MSG); msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); msg_set_destport(mhdr, 0); @@ -809,7 +853,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; - tsk->probing_state = TIPC_CONN_OK; + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); @@ -841,7 +885,7 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; if (!*timeo_p) return -EAGAIN; @@ -890,6 +934,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; struct sk_buff_head pktchain; + bool is_connectionless = tipc_sk_type_connectionless(sk); struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -900,18 +945,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (unlikely(!dest)) { - if (tsk->connected && sock->state == SS_READY) - dest = &tsk->remote; + if (is_connectionless && tsk->peer.family == AF_TIPC) + dest = &tsk->peer; else return -EDESTADDRREQ; } else if (unlikely(m->msg_namelen < sizeof(*dest)) || dest->family != AF_TIPC) { return -EINVAL; } - if (unlikely(sock->state != SS_READY)) { - if (sock->state == SS_LISTENING) + if (!is_connectionless) { + if (sk->sk_state == TIPC_LISTEN) return -EPIPE; - if (sock->state != SS_UNCONNECTED) + if (sk->sk_state != TIPC_OPEN) return -EISCONN; if (tsk->published) return -EOPNOTSUPP; @@ -963,8 +1008,8 @@ new_mtu: TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { - if (sock->state != SS_READY) - sock->state = SS_CONNECTING; + if (!is_connectionless) + tipc_set_sk_state(sk, TIPC_CONNECTING); return dsz; } if (rc == -ELINKCONG) { @@ -995,9 +1040,9 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; - else if (sock->state != SS_CONNECTED) + else if (!tipc_sk_connected(sk)) return -ENOTCONN; if (!*timeo_p) return -EAGAIN; @@ -1008,7 +1053,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && !tsk_conn_cong(tsk)) || - !tsk->connected); + !tipc_sk_connected(sk)); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -1064,14 +1109,17 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > (uint)INT_MAX) return -EMSGSIZE; - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (unlikely(!tipc_sk_connected(sk))) { + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else return -ENOTCONN; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + if (!timeo && tsk->link_cong) + return -ELINKCONG; + dnode = tsk_peer_node(tsk); skb_queue_head_init(&pktchain); @@ -1145,10 +1193,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_intv = CONN_PROBING_INTERVAL; - tsk->probing_state = TIPC_CONN_OK; - tsk->connected = 1; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); @@ -1256,13 +1302,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); - if (!tsk->connected) + if (!tipc_sk_connected(sk)) return; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_port, @@ -1291,7 +1338,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { + if (sk->sk_shutdown & RCV_SHUTDOWN) { err = -ENOTCONN; break; } @@ -1332,6 +1379,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *buf; struct tipc_msg *msg; + bool is_connectionless = tipc_sk_type_connectionless(sk); long timeo; unsigned int sz; u32 err; @@ -1343,7 +1391,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, lock_sock(sk); - if (unlikely(sock->state == SS_UNCONNECTED)) { + if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) { res = -ENOTCONN; goto exit; } @@ -1388,8 +1436,8 @@ restart: goto exit; res = sz; } else { - if ((sock->state == SS_READY) || - ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)) + if (is_connectionless || err == TIPC_CONN_SHUTDOWN || + m->msg_control) res = 0; else res = -ECONNRESET; @@ -1398,7 +1446,7 @@ restart: if (unlikely(flags & MSG_PEEK)) goto exit; - if (likely(sock->state != SS_READY)) { + if (likely(!is_connectionless)) { tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); @@ -1429,7 +1477,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, struct tipc_msg *msg; long timeo; unsigned int sz; - int sz_to_copy, target, needed; + int target; int sz_copied = 0; u32 err; int res = 0, hlen; @@ -1440,7 +1488,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, lock_sock(sk); - if (unlikely(sock->state == SS_UNCONNECTED)) { + if (unlikely(sk->sk_state == TIPC_OPEN)) { res = -ENOTCONN; goto exit; } @@ -1477,11 +1525,13 @@ restart: /* Capture message data (if valid) & compute return value (always) */ if (!err) { - u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + u32 offset = TIPC_SKB_CB(buf)->bytes_read; + u32 needed; + int sz_to_copy; sz -= offset; needed = (buf_len - sz_copied); - sz_to_copy = (sz <= needed) ? sz : needed; + sz_to_copy = min(sz, needed); res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); if (res) @@ -1491,8 +1541,8 @@ restart: if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = - (void *)(unsigned long)(offset + sz_to_copy); + TIPC_SKB_CB(buf)->bytes_read = + offset + sz_to_copy; goto exit; } } else { @@ -1574,49 +1624,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); - struct socket *sock = sk->sk_socket; struct tipc_msg *hdr = buf_msg(skb); if (unlikely(msg_mcast(hdr))) return false; - switch ((int)sock->state) { - case SS_CONNECTED: - - /* Accept only connection-based messages sent by peer */ - if (unlikely(!tsk_peer_msg(tsk, hdr))) - return false; - - if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; - tsk->connected = 0; - /* Let timer expire on it's own */ - tipc_node_remove_conn(net, tsk_peer_node(tsk), - tsk->portid); - } - return true; - - case SS_CONNECTING: - + switch (sk->sk_state) { + case TIPC_CONNECTING: /* Accept only ACK or NACK message */ if (unlikely(!msg_connected(hdr))) return false; if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; return true; } if (unlikely(!msg_isdata(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; return true; } tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); msg_set_importance(&tsk->phdr, msg_importance(hdr)); - sock->state = SS_CONNECTED; /* If 'ACK+' message, add to socket receive queue */ if (msg_data_sz(hdr)) @@ -1630,18 +1662,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) msg_set_dest_droppable(hdr, 1); return false; - case SS_LISTENING: - case SS_UNCONNECTED: - + case TIPC_OPEN: + case TIPC_DISCONNECTING: + break; + case TIPC_LISTEN: /* Accept only SYN message */ if (!msg_connected(hdr) && !(msg_errcode(hdr))) return true; break; - case SS_DISCONNECTING: - break; + case TIPC_ESTABLISHED: + /* Accept only connection-based messages sent by peer */ + if (unlikely(!tsk_peer_msg(tsk, hdr))) + return false; + + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + /* Let timer expire on it's own */ + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); + sk->sk_state_change(sk); + } + return true; default: - pr_err("Unknown socket state %u\n", sock->state); + pr_err("Unknown sk_state %u\n", sk->sk_state); } + return false; } @@ -1692,7 +1737,6 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) static bool filter_rcv(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *xmitq) { - struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); unsigned int limit = rcvbuf_limit(sk, skb); @@ -1718,7 +1762,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, } /* Reject if wrong message type for current socket state */ - if (unlikely(sock->state == SS_READY)) { + if (tipc_sk_type_connectionless(sk)) { if (msg_connected(hdr)) { err = TIPC_ERR_NO_PORT; goto reject; @@ -1735,7 +1779,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, } /* Enqueue message */ - TIPC_SKB_CB(skb)->handle = NULL; + TIPC_SKB_CB(skb)->bytes_read = 0; __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); @@ -1899,7 +1943,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); + done = sk_wait_event(sk, timeo_p, + sk->sk_state != TIPC_CONNECTING); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -1922,21 +1967,19 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; - socket_state previous; + int previous; int res = 0; lock_sock(sk); /* DGRAM/RDM connect(), just save the destaddr */ - if (sock->state == SS_READY) { + if (tipc_sk_type_connectionless(sk)) { if (dst->family == AF_UNSPEC) { - memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); - tsk->connected = 0; + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); } else if (destlen != sizeof(struct sockaddr_tipc)) { res = -EINVAL; } else { - memcpy(&tsk->remote, dest, destlen); - tsk->connected = 1; + memcpy(&tsk->peer, dest, destlen); } goto exit; } @@ -1952,9 +1995,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, goto exit; } - previous = sock->state; - switch (sock->state) { - case SS_UNCONNECTED: + previous = sk->sk_state; + + switch (sk->sk_state) { + case TIPC_OPEN: /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; @@ -1969,27 +2013,29 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; - /* Just entered SS_CONNECTING state; the only + /* Just entered TIPC_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; - case SS_CONNECTING: - if (previous == SS_CONNECTING) - res = -EALREADY; - if (!timeout) + /* fall thru' */ + case TIPC_CONNECTING: + if (!timeout) { + if (previous == TIPC_CONNECTING) + res = -EALREADY; goto exit; + } timeout = msecs_to_jiffies(timeout); /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ res = tipc_wait_for_connect(sock, &timeout); break; - case SS_CONNECTED: + case TIPC_ESTABLISHED: res = -EISCONN; break; default: res = -EINVAL; - break; } + exit: release_sock(sk); return res; @@ -2008,15 +2054,9 @@ static int tipc_listen(struct socket *sock, int len) int res; lock_sock(sk); - - if (sock->state != SS_UNCONNECTED) - res = -EINVAL; - else { - sock->state = SS_LISTENING; - res = 0; - } - + res = tipc_set_sk_state(sk, TIPC_LISTEN); release_sock(sk); + return res; } @@ -2042,9 +2082,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = -EINVAL; - if (sock->state != SS_LISTENING) - break; err = -EAGAIN; if (!timeo) break; @@ -2075,7 +2112,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) lock_sock(sk); - if (sock->state != SS_LISTENING) { + if (sk->sk_state != TIPC_LISTEN) { res = -EINVAL; goto exit; } @@ -2086,7 +2123,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); @@ -2106,7 +2143,6 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); - new_sock->state = SS_CONNECTED; tsk_set_importance(new_tsock, msg_importance(msg)); if (msg_named(msg)) { @@ -2146,13 +2182,6 @@ exit: static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *skb; - u32 dnode = tsk_peer_node(tsk); - u32 dport = tsk_peer_port(tsk); - u32 onode = tipc_own_addr(net); - u32 oport = tsk->portid; int res; if (how != SHUT_RDWR) @@ -2160,45 +2189,17 @@ static int tipc_shutdown(struct socket *sock, int how) lock_sock(sk); - switch (sock->state) { - case SS_CONNECTING: - case SS_CONNECTED: - -restart: - dnode = tsk_peer_node(tsk); - - /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ - skb = __skb_dequeue(&sk->sk_receive_queue); - if (skb) { - if (TIPC_SKB_CB(skb)->handle != NULL) { - kfree_skb(skb); - goto restart; - } - tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN); - } else { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, onode, dport, oport, - TIPC_CONN_SHUTDOWN); - if (skb) - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - } - tsk->connected = 0; - sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(net, dnode, tsk->portid); - /* fall through */ - - case SS_DISCONNECTING: + __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); + sk->sk_shutdown = SEND_SHUTDOWN; + if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ __skb_queue_purge(&sk->sk_receive_queue); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); res = 0; - break; - - default: + } else { res = -ENOTCONN; } @@ -2215,17 +2216,16 @@ static void tipc_sk_timeout(unsigned long data) u32 own_node = tsk_own_node(tsk); bh_lock_sock(sk); - if (!tsk->connected) { + if (!tipc_sk_connected(sk)) { bh_unlock_sock(sk); goto exit; } peer_port = tsk_peer_port(tsk); peer_node = tsk_peer_node(tsk); - if (tsk->probing_state == TIPC_CONN_PROBING) { + if (tsk->probe_unacked) { if (!sock_owned_by_user(sk)) { - sk->sk_socket->state = SS_DISCONNECTING; - tsk->connected = 0; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); @@ -2234,13 +2234,15 @@ static void tipc_sk_timeout(unsigned long data) sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); } - } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); - tsk->probing_state = TIPC_CONN_PROBING; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + bh_unlock_sock(sk); + goto exit; } + + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); + tsk->probe_unacked = true; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); bh_unlock_sock(sk); if (skb) tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); @@ -2251,11 +2253,12 @@ exit: static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct publication *publ; u32 key; - if (tsk->connected) + if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) @@ -2713,6 +2716,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, struct nlattr *attrs; struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sock *sk = &tsk->sk; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); @@ -2727,7 +2731,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) goto attr_msg_cancel; - if (tsk->connected) { + if (tipc_sk_connected(sk)) { err = __tipc_nl_add_sk_con(skb, tsk); if (err) goto attr_msg_cancel; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..87620183910e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, - &last); + skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, + &err, &last); if (skb) break; diff --git a/net/wireless/core.c b/net/wireless/core.c index 8201e6d7449e..158c59ecf90a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -210,11 +210,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; - if (!wdev->p2p_started) + if (!wdev_running(wdev)) return; rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; + wdev->is_running = false; rdev->opencount--; @@ -233,11 +233,11 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) return; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return; rdev_stop_nan(rdev, wdev); - wdev->nan_started = false; + wdev->is_running = false; rdev->opencount--; } @@ -562,6 +562,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c->limits[j].max > 1)) return -EINVAL; + /* + * This isn't well-defined right now. If you have an + * IBSS interface, then its beacon interval may change + * by joining other networks, and nothing prevents it + * from doing that. + * So technically we probably shouldn't even allow AP + * and IBSS in the same interface, but it seems that + * some drivers support that, possibly only with fixed + * beacon intervals for IBSS. + */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_ADHOC) && + c->beacon_int_min_gcd)) { + return -EINVAL; + } + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -571,6 +586,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) return -EINVAL; } +#ifndef CONFIG_WIRELESS_WDS + if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS))) + return -EINVAL; +#endif + /* You can't even choose that many! */ if (WARN_ON(cnt < c->max_interfaces)) return -EINVAL; @@ -609,6 +629,11 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) return -EINVAL; +#ifndef CONFIG_WIRELESS_WDS + if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS))) + return -EINVAL; +#endif + /* * if a wiphy has unsupported modes for regulatory channel enforcement, * opt-out of enforcement checking diff --git a/net/wireless/core.h b/net/wireless/core.h index 08d2e948c9ad..fb2fcd5581fe 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -345,7 +345,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); + const u8 *auth_data, int auth_data_len); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, @@ -475,7 +475,7 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, u32 *mask); int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - u32 beacon_int); + enum nl80211_iftype iftype, u32 beacon_int); void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index fa2066b56f36..2d8518a37eab 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -183,6 +183,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); wdev->mesh_id_len = setup->mesh_id_len; wdev->chandef = setup->chandef; + wdev->beacon_interval = setup->beacon_interval; } return err; @@ -258,6 +259,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, err = rdev_leave_mesh(rdev, dev); if (!err) { wdev->mesh_id_len = 0; + wdev->beacon_interval = 0; memset(&wdev->chandef, 0, sizeof(wdev->chandef)); rdev_set_qos_map(rdev, dev, NULL); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index cbb48e26a871..bd1f7a159d6a 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -204,14 +204,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) + const u8 *auth_data, int auth_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_auth_request req = { .ie = ie, .ie_len = ie_len, - .sae_data = sae_data, - .sae_data_len = sae_data_len, + .auth_data = auth_data, + .auth_data_len = auth_data_len, .auth_type = auth_type, .key = key, .key_len = key_len, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0d3ab4bfeacf..24ab199ef2fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -343,7 +343,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, - [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, + [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, @@ -400,6 +400,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, + [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, + .len = FILS_MAX_KEK_LEN }, + [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN }, + [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, }, }; /* policy for the key attributes */ @@ -421,6 +425,7 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = { [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, }; +#ifdef CONFIG_PM /* policy for WoWLAN attributes */ static const struct nla_policy nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { @@ -454,6 +459,7 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; +#endif /* CONFIG_PM */ /* policy for coalesce rule attributes */ static const struct nla_policy @@ -1062,6 +1068,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, c->radar_detect_regions))) goto nla_put_failure; + if (c->beacon_int_min_gcd && + nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, + c->beacon_int_min_gcd)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } @@ -1309,6 +1319,95 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, return 0; } +#define CMD(op, n) \ + do { \ + if (rdev->ops->op) { \ + i++; \ + if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ + goto nla_put_failure; \ + } \ + } while (0) + +static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + int i = 0; + + /* + * do *NOT* add anything into this function, new things need to be + * advertised only to new versions of userspace that can deal with + * the split (and they can't possibly care about new features... + */ + CMD(add_virtual_intf, NEW_INTERFACE); + CMD(change_virtual_intf, SET_INTERFACE); + CMD(add_key, NEW_KEY); + CMD(start_ap, START_AP); + CMD(add_station, NEW_STATION); + CMD(add_mpath, NEW_MPATH); + CMD(update_mesh_config, SET_MESH_CONFIG); + CMD(change_bss, SET_BSS); + CMD(auth, AUTHENTICATE); + CMD(assoc, ASSOCIATE); + CMD(deauth, DEAUTHENTICATE); + CMD(disassoc, DISASSOCIATE); + CMD(join_ibss, JOIN_IBSS); + CMD(join_mesh, JOIN_MESH); + CMD(set_pmksa, SET_PMKSA); + CMD(del_pmksa, DEL_PMKSA); + CMD(flush_pmksa, FLUSH_PMKSA); + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + CMD(remain_on_channel, REMAIN_ON_CHANNEL); + CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); + CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); + if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) + goto nla_put_failure; + } + if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || + rdev->ops->join_mesh) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + goto nla_put_failure; + } + CMD(set_wds_peer, SET_WDS_PEER); + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + CMD(tdls_mgmt, TDLS_MGMT); + CMD(tdls_oper, TDLS_OPER); + } + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + CMD(sched_scan_start, START_SCHED_SCAN); + CMD(probe_client, PROBE_CLIENT); + CMD(set_noack_map, SET_NOACK_MAP); + if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) + goto nla_put_failure; + } + CMD(start_p2p_device, START_P2P_DEVICE); + CMD(set_mcast_rate, SET_MCAST_RATE); +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif + + if (rdev->ops->connect || rdev->ops->auth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) + goto nla_put_failure; + } + + if (rdev->ops->disconnect || rdev->ops->deauth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) + goto nla_put_failure; + } + + return i; + nla_put_failure: + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -1536,68 +1635,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (!nl_cmds) goto nla_put_failure; - i = 0; -#define CMD(op, n) \ - do { \ - if (rdev->ops->op) { \ - i++; \ - if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ - goto nla_put_failure; \ - } \ - } while (0) - - CMD(add_virtual_intf, NEW_INTERFACE); - CMD(change_virtual_intf, SET_INTERFACE); - CMD(add_key, NEW_KEY); - CMD(start_ap, START_AP); - CMD(add_station, NEW_STATION); - CMD(add_mpath, NEW_MPATH); - CMD(update_mesh_config, SET_MESH_CONFIG); - CMD(change_bss, SET_BSS); - CMD(auth, AUTHENTICATE); - CMD(assoc, ASSOCIATE); - CMD(deauth, DEAUTHENTICATE); - CMD(disassoc, DISASSOCIATE); - CMD(join_ibss, JOIN_IBSS); - CMD(join_mesh, JOIN_MESH); - CMD(set_pmksa, SET_PMKSA); - CMD(del_pmksa, DEL_PMKSA); - CMD(flush_pmksa, FLUSH_PMKSA); - if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) - CMD(remain_on_channel, REMAIN_ON_CHANNEL); - CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(mgmt_tx, FRAME); - CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) - goto nla_put_failure; - } - if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || - rdev->ops->join_mesh) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) - goto nla_put_failure; - } - CMD(set_wds_peer, SET_WDS_PEER); - if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { - CMD(tdls_mgmt, TDLS_MGMT); - CMD(tdls_oper, TDLS_OPER); - } - if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - CMD(sched_scan_start, START_SCHED_SCAN); - CMD(probe_client, PROBE_CLIENT); - CMD(set_noack_map, SET_NOACK_MAP); - if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) - goto nla_put_failure; - } - CMD(start_p2p_device, START_P2P_DEVICE); - CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_NL80211_TESTMODE - CMD(testmode_cmd, TESTMODE); -#endif + i = nl80211_add_commands_unsplit(rdev, msg); + if (i < 0) + goto nla_put_failure; if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); @@ -1607,22 +1647,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); + CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); + CMD(update_connect_params, UPDATE_CONNECT_PARAMS); } - /* add into the if now */ #undef CMD - if (rdev->ops->connect || rdev->ops->auth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) - goto nla_put_failure; - } - - if (rdev->ops->disconnect || rdev->ops->deauth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) - goto nla_put_failure; - } - nla_nest_end(msg, nl_cmds); state->split_start++; if (state->split) @@ -2283,10 +2312,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { - result = nla_parse(tb, NL80211_TXQ_ATTR_MAX, - nla_data(nl_txq_params), - nla_len(nl_txq_params), - txq_params_policy); + result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX, + nl_txq_params, + txq_params_policy); if (result) return result; result = parse_txq_params(tb, &txq_params); @@ -3536,8 +3564,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, sband = rdev->wiphy.bands[band]; if (sband == NULL) return -EINVAL; - err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), - nla_len(tx_rates), nl80211_txattr_policy); + err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates, + nl80211_txattr_policy); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { @@ -3743,12 +3771,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && auth_type == NL80211_AUTHTYPE_SAE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_FILS_STA) && + (auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK)) + return false; return true; case NL80211_CMD_CONNECT: case NL80211_CMD_START_AP: /* SAE not supported yet */ if (auth_type == NL80211_AUTHTYPE_SAE) return false; + /* FILS not supported yet */ + if (auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) + return false; return true; default: return false; @@ -3790,7 +3829,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); - err = cfg80211_validate_beacon_int(rdev, params.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, + params.beacon_interval); if (err) return err; @@ -6292,9 +6332,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { - r = nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, - nla_data(nl_reg_rule), nla_len(nl_reg_rule), - reg_rule_policy); + r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX, + nl_reg_rule, reg_rule_policy); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); @@ -6361,8 +6400,8 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, if (!nla_ok(nest, nla_len(nest))) return -EINVAL; - err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest), - nla_len(nest), nl80211_bss_select_policy); + err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, + nl80211_bss_select_policy); if (err) return err; @@ -6752,9 +6791,8 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, if (WARN_ON(i >= n_plans)) return -EINVAL; - err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX, - nla_data(attr), nla_len(attr), - nl80211_plan_policy); + err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX, + attr, nl80211_plan_policy); if (err) return err; @@ -6843,9 +6881,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *rssi; - err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - nla_data(attr), nla_len(attr), - nl80211_match_policy); + err = nla_parse_nested(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, nl80211_match_policy); if (err) return ERR_PTR(err); /* add other standalone attributes here */ @@ -7016,9 +7054,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *ssid, *rssi; - err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - nla_data(attr), nla_len(attr), - nl80211_match_policy); + err = nla_parse_nested(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, nl80211_match_policy); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; @@ -7696,8 +7734,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ieee80211_channel *chan; - const u8 *bssid, *ssid, *ie = NULL, *sae_data = NULL; - int err, ssid_len, ie_len = 0, sae_data_len = 0; + const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL; + int err, ssid_len, ie_len = 0, auth_data_len = 0; enum nl80211_auth_type auth_type; struct key_parse key; bool local_state_change; @@ -7777,17 +7815,23 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) return -EINVAL; - if (auth_type == NL80211_AUTHTYPE_SAE && - !info->attrs[NL80211_ATTR_SAE_DATA]) + if ((auth_type == NL80211_AUTHTYPE_SAE || + auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) && + !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; - if (info->attrs[NL80211_ATTR_SAE_DATA]) { - if (auth_type != NL80211_AUTHTYPE_SAE) + if (info->attrs[NL80211_ATTR_AUTH_DATA]) { + if (auth_type != NL80211_AUTHTYPE_SAE && + auth_type != NL80211_AUTHTYPE_FILS_SK && + auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && + auth_type != NL80211_AUTHTYPE_FILS_PK) return -EINVAL; - sae_data = nla_data(info->attrs[NL80211_ATTR_SAE_DATA]); - sae_data_len = nla_len(info->attrs[NL80211_ATTR_SAE_DATA]); + auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); + auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); /* need to include at least Auth Transaction and Status Code */ - if (sae_data_len < 4) + if (auth_data_len < 4) return -EINVAL; } @@ -7804,7 +7848,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, ssid, ssid_len, ie, ie_len, key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + auth_data, auth_data_len); wdev_unlock(dev->ieee80211_ptr); return err; } @@ -7983,6 +8027,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.flags |= ASSOC_REQ_USE_RRM; } + if (info->attrs[NL80211_ATTR_FILS_KEK]) { + req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); + req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); + if (!info->attrs[NL80211_ATTR_FILS_NONCES]) + return -EINVAL; + req.fils_nonces = + nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); + } + err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { wdev_lock(dev->ieee80211_ptr); @@ -8140,7 +8193,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC, + ibss.beacon_interval); if (err) return err; @@ -8713,6 +8767,37 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_update_connect_params(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_connect_params connect = {}; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + u32 changed = 0; + int ret; + + if (!rdev->ops->update_connect_params) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_IE]) { + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + changed |= UPDATE_ASSOC_IES; + } + + wdev_lock(dev->ieee80211_ptr); + if (!wdev->current_bss) + ret = -ENOLINK; + else + ret = rdev_update_connect_params(rdev, dev, &connect, changed); + wdev_unlock(dev->ieee80211_ptr); + + return ret; +} + static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -9404,7 +9489,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, + NL80211_IFTYPE_MESH_POINT, + setup.beacon_interval); if (err) return err; } @@ -9715,9 +9802,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!rdev->wiphy.wowlan->tcp) return -EINVAL; - err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, - nla_data(attr), nla_len(attr), - nl80211_wowlan_tcp_policy); + err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr, + nl80211_wowlan_tcp_policy); if (err) return err; @@ -9862,9 +9948,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, goto out; } - err = nla_parse(tb, NL80211_ATTR_MAX, - nla_data(attr), nla_len(attr), - nl80211_policy); + err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy); if (err) goto out; @@ -9898,10 +9982,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto set_wakeup; } - err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG, - nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]), - nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]), - nl80211_wowlan_policy); + err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG, + info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], + nl80211_wowlan_policy); if (err) return err; @@ -9983,8 +10066,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) rem) { u8 *mask_pat; - nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), - nla_len(pat), NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + NULL); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10194,8 +10277,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; - err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule), - nla_len(rule), nl80211_coalesce_policy); + err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, + nl80211_coalesce_policy); if (err) return err; @@ -10233,8 +10316,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), - nla_len(pat), NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; @@ -10353,10 +10435,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_REKEY_DATA]) return -EINVAL; - err = nla_parse(tb, MAX_NL80211_REKEY_DATA, - nla_data(info->attrs[NL80211_ATTR_REKEY_DATA]), - nla_len(info->attrs[NL80211_ATTR_REKEY_DATA]), - nl80211_rekey_policy); + err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA, + info->attrs[NL80211_ATTR_REKEY_DATA], + nl80211_rekey_policy); if (err) return err; @@ -10505,7 +10586,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; - if (wdev->p2p_started) + if (wdev_running(wdev)) return 0; if (rfkill_blocked(rdev->rfkill)) @@ -10515,7 +10596,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (err) return err; - wdev->p2p_started = true; + wdev->is_running = true; rdev->opencount++; return 0; @@ -10547,7 +10628,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (wdev->nan_started) + if (!wdev_running(wdev)) return -EEXIST; if (rfkill_blocked(rdev->rfkill)) @@ -10570,7 +10651,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (err) return err; - wdev->nan_started = true; + wdev->is_running = true; rdev->opencount++; return 0; @@ -10654,7 +10735,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_NAN_FUNC]) @@ -10664,10 +10745,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb, wdev->owner_nlportid != info->snd_portid) return -ENOTCONN; - err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, - nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), - nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), - nl80211_nan_func_policy); + err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, + info->attrs[NL80211_ATTR_NAN_FUNC], + nl80211_nan_func_policy); if (err) return err; @@ -10762,9 +10842,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (tb[NL80211_NAN_FUNC_SRF]) { struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; - err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, - nla_data(tb[NL80211_NAN_FUNC_SRF]), - nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); + err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX, + tb[NL80211_NAN_FUNC_SRF], + nl80211_nan_srf_policy); if (err) goto out; @@ -10890,7 +10970,7 @@ static int nl80211_nan_del_func(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -10918,7 +10998,7 @@ static int nl80211_nan_change_config(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { @@ -11230,10 +11310,7 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) + if (!wdev_running(wdev)) return -ENETDOWN; } @@ -11394,10 +11471,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) + if (!wdev_running(wdev)) return -ENETDOWN; } } @@ -11710,6 +11784,31 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, return 0; } +static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const struct nlattr *nla; + bool enabled; + + if (netif_running(dev)) + return -EBUSY; + + if (!rdev->ops->set_multicast_to_unicast) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED]; + enabled = nla_get_flag(nla); + + return rdev_set_multicast_to_unicast(rdev, dev, enabled); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -11768,29 +11867,15 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[1] = wdev; } - if (dev) { - if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && - !netif_running(dev)) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } + if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && + !wdev_running(wdev)) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (dev) dev_hold(dev); - } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && - !wdev->p2p_started) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } - if (wdev->iftype == NL80211_IFTYPE_NAN && - !wdev->nan_started) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } - } info->user_ptr[0] = rdev; } @@ -12163,6 +12248,14 @@ static const struct genl_ops nl80211_ops[] = { NL80211_FLAG_NEED_RTNL, }, { + .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, + .doit = nl80211_update_connect_params, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { .cmd = NL80211_CMD_DISCONNECT, .doit = nl80211_disconnect, .policy = nl80211_policy, @@ -12583,6 +12676,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, + .doit = nl80211_set_multicast_to_unicast, + .policy = nl80211_policy, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 11cf83c8ad4f..2f425075ada8 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -490,6 +490,18 @@ static inline int rdev_connect(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_update_connect_params(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *sme, u32 changed) +{ + int ret; + trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed); + ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason_code) { @@ -562,6 +574,18 @@ static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const bool enabled) +{ + int ret; + trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled); + ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev) { trace_rdev_rfkill_poll(&rdev->wiphy); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a77db333927e..2b5bb380414b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1088,7 +1088,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); - else if (wdev->current_bss) + else if (wdev->ssid_len) err = rdev_disconnect(rdev, dev, reason); return err; diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 0082f4b01795..14b3f007826d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -104,13 +104,16 @@ static int wiphy_suspend(struct device *dev) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wiphy.wowlan_config) + if (!rdev->wiphy.wowlan_config) { cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); + } if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); ret = rdev_suspend(rdev, NULL); } } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index a3d0a91b1e09..ea1b47e04fa4 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1281,6 +1281,24 @@ TRACE_EVENT(rdev_connect, __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid)) ); +TRACE_EVENT(rdev_update_connect_params, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_connect_params *sme, u32 changed), + TP_ARGS(wiphy, netdev, sme, changed), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, changed) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->changed = changed; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->changed) +); + TRACE_EVENT(rdev_set_cqm_rssi_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, s32 rssi_thold, @@ -3030,6 +3048,25 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); + +TRACE_EVENT(rdev_set_multicast_to_unicast, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const bool enabled), + TP_ARGS(wiphy, netdev, enabled), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(bool, enabled) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->enabled = enabled; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, + BOOL_TO_STR(__entry->enabled)) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 8edce22d1b93..88725f8eefad 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -13,6 +13,7 @@ #include <net/dsfield.h> #include <linux/if_vlan.h> #include <linux/mpls.h> +#include <linux/gcd.h> #include "core.h" #include "rdev-ops.h" @@ -420,8 +421,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) } EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); -static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, - const u8 *addr, enum nl80211_iftype iftype) +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -525,13 +526,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, return 0; } - -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype) -{ - return __ieee80211_data_to_8023(skb, NULL, addr, iftype); -} -EXPORT_SYMBOL(ieee80211_data_to_8023); +EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype, @@ -746,24 +741,18 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, - bool has_80211_header) + const u8 *check_da, const u8 *check_sa) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; u16 ethertype; u8 *payload; - int offset = 0, remaining, err; + int offset = 0, remaining; struct ethhdr eth; bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb); bool reuse_skb = false; bool last = false; - if (has_80211_header) { - err = __ieee80211_data_to_8023(skb, ð, addr, iftype); - if (err) - goto out; - } - while (!last) { unsigned int subframe_len; int len; @@ -780,8 +769,17 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, goto purge; offset += sizeof(struct ethhdr); - /* reuse skb for the last subframe */ last = remaining <= subframe_len + padding; + + /* FIXME: should we really accept multicast DA? */ + if ((check_da && !is_multicast_ether_addr(eth.h_dest) && + !ether_addr_equal(check_da, eth.h_dest)) || + (check_sa && !ether_addr_equal(check_sa, eth.h_source))) { + offset += len + padding; + continue; + } + + /* reuse skb for the last subframe */ if (!skb_is_nonlinear(skb) && !reuse_frag && last) { skb_pull(skb, offset); frame = skb; @@ -819,7 +817,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, purge: __skb_queue_purge(list); - out: dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); @@ -1381,6 +1378,25 @@ static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) return false; } +static size_t skip_ie(const u8 *ies, size_t ielen, size_t pos) +{ + /* we assume a validly formed IEs buffer */ + u8 len = ies[pos + 1]; + + pos += 2 + len; + + /* the IE itself must have 255 bytes for fragments to follow */ + if (len < 255) + return pos; + + while (pos < ielen && ies[pos] == WLAN_EID_FRAGMENT) { + len = ies[pos + 1]; + pos += 2 + len; + } + + return pos; +} + size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, const u8 *after_ric, int n_after_ric, @@ -1390,14 +1406,14 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { - pos += 2 + ies[pos + 1]; + pos = skip_ie(ies, ielen, pos); while (pos < ielen && !ieee80211_id_in_list(after_ric, n_after_ric, ies[pos])) - pos += 2 + ies[pos + 1]; + pos = skip_ie(ies, ielen, pos); } else { - pos += 2 + ies[pos + 1]; + pos = skip_ie(ies, ielen, pos); } } @@ -1558,31 +1574,57 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); -int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - u32 beacon_int) +static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, + u32 *beacon_int_gcd, + bool *beacon_int_different) { struct wireless_dev *wdev; - int res = 0; - if (beacon_int < 10 || beacon_int > 10000) - return -EINVAL; + *beacon_int_gcd = 0; + *beacon_int_different = false; - list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + list_for_each_entry(wdev, &wiphy->wdev_list, list) { if (!wdev->beacon_interval) continue; - if (wdev->beacon_interval != beacon_int) { - res = -EINVAL; - break; + + if (!*beacon_int_gcd) { + *beacon_int_gcd = wdev->beacon_interval; + continue; } + + if (wdev->beacon_interval == *beacon_int_gcd) + continue; + + *beacon_int_different = true; + *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval); } - return res; + if (new_beacon_int && *beacon_int_gcd != new_beacon_int) { + if (*beacon_int_gcd) + *beacon_int_different = true; + *beacon_int_gcd = gcd(*beacon_int_gcd, new_beacon_int); + } +} + +int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, u32 beacon_int) +{ + /* + * This is just a basic pre-condition check; if interface combinations + * are possible the driver must already be checking those with a call + * to cfg80211_check_combinations(), in which case we'll validate more + * through the cfg80211_calculate_bi_data() call and code in + * cfg80211_iter_combinations(). + */ + + if (beacon_int < 10 || beacon_int > 10000) + return -EINVAL; + + return 0; } int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data) @@ -1592,8 +1634,23 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, int i, j, iftype; int num_interfaces = 0; u32 used_iftypes = 0; + u32 beacon_int_gcd; + bool beacon_int_different; - if (radar_detect) { + /* + * This is a bit strange, since the iteration used to rely only on + * the data given by the driver, but here it now relies on context, + * in form of the currently operating interfaces. + * This is OK for all current users, and saves us from having to + * push the GCD calculations into all the drivers. + * In the future, this should probably rely more on data that's in + * cfg80211 already - the only thing not would appear to be any new + * interfaces (while being brought up) and channel/radar data. + */ + cfg80211_calculate_bi_data(wiphy, params->new_beacon_int, + &beacon_int_gcd, &beacon_int_different); + + if (params->radar_detect) { rcu_read_lock(); regdom = rcu_dereference(cfg80211_regdomain); if (regdom) @@ -1602,8 +1659,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, } for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - num_interfaces += iftype_num[iftype]; - if (iftype_num[iftype] > 0 && + num_interfaces += params->iftype_num[iftype]; + if (params->iftype_num[iftype] > 0 && !(wiphy->software_iftypes & BIT(iftype))) used_iftypes |= BIT(iftype); } @@ -1617,7 +1674,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if (num_interfaces > c->max_interfaces) continue; - if (num_different_channels > c->num_different_channels) + if (params->num_different_channels > c->num_different_channels) continue; limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, @@ -1632,16 +1689,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, all_iftypes |= limits[j].types; if (!(limits[j].types & BIT(iftype))) continue; - if (limits[j].max < iftype_num[iftype]) + if (limits[j].max < params->iftype_num[iftype]) goto cont; - limits[j].max -= iftype_num[iftype]; + limits[j].max -= params->iftype_num[iftype]; } } - if (radar_detect != (c->radar_detect_widths & radar_detect)) + if (params->radar_detect != + (c->radar_detect_widths & params->radar_detect)) goto cont; - if (radar_detect && c->radar_detect_regions && + if (params->radar_detect && c->radar_detect_regions && !(c->radar_detect_regions & BIT(region))) goto cont; @@ -1653,6 +1711,14 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if ((all_iftypes & used_iftypes) != used_iftypes) goto cont; + if (beacon_int_gcd) { + if (c->beacon_int_min_gcd && + beacon_int_gcd < c->beacon_int_min_gcd) + goto cont; + if (!c->beacon_int_min_gcd && beacon_int_different) + goto cont; + } + /* This combination covered all interface types and * supported the requested numbers, so we're good. */ @@ -1675,14 +1741,11 @@ cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, } int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]) + struct iface_combination_params *params) { int err, num = 0; - err = cfg80211_iter_combinations(wiphy, num_different_channels, - radar_detect, iftype_num, + err = cfg80211_iter_combinations(wiphy, params, cfg80211_iter_sum_ifcombs, &num); if (err) return err; |