diff options
Diffstat (limited to 'net')
30 files changed, 434 insertions, 247 deletions
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 65728e0dc4ff..0ee453fad3de 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -987,15 +987,12 @@ static int __init br_netfilter_init(void) if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - ret = -ENOMEM; - goto err1; + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + return -ENOMEM; } #endif printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; -err1: - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); - return ret; } static void __exit br_netfilter_fini(void) diff --git a/net/core/dev.c b/net/core/dev.c index d030575532a2..8f9710c62e20 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4024,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ switch (skb->ip_summed) { @@ -5335,7 +5336,7 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink); /** * netdev_bonding_info_change - Dispatch event about slave change * @dev: device - * @netdev_bonding_info: info to dispatch + * @bonding_info: info to dispatch * * Send NETDEV_BONDING_INFO to netdev notifiers with info. * The caller must hold the RTNL lock. diff --git a/net/core/filter.c b/net/core/filter.c index ec9baea10c16..f6bdc2b1ba01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -531,7 +531,7 @@ do_pass: *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; - /* Unkown instruction. */ + /* Unknown instruction. */ default: goto err; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 999341244434..f2aa73bfb0e4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -614,8 +614,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, { struct rps_map *map; cpumask_var_t mask; - size_t len = 0; - int i; + int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; @@ -626,17 +625,11 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, for (i = 0; i < map->len; i++) cpumask_set_cpu(map->cpus[i], mask); - len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); - if (PAGE_SIZE - len < 3) { - rcu_read_unlock(); - free_cpumask_var(mask); - return -EINVAL; - } + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); rcu_read_unlock(); - free_cpumask_var(mask); - len += sprintf(buf + len, "\n"); - return len; + + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_rps_map(struct netdev_rx_queue *queue, @@ -1090,8 +1083,7 @@ static ssize_t show_xps_map(struct netdev_queue *queue, struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; - size_t len = 0; - int i; + int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; @@ -1117,15 +1109,9 @@ static ssize_t show_xps_map(struct netdev_queue *queue, } rcu_read_unlock(); - len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); - if (PAGE_SIZE - len < 3) { - free_cpumask_var(mask); - return -EINVAL; - } - + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); - len += sprintf(buf + len, "\n"); - return len; + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_xps_map(struct netdev_queue *queue, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9fa25b0ea145..b4899f5b7388 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -97,7 +97,7 @@ * New xmit() return, do_div and misc clean up by Stephen Hemminger * <shemminger@osdl.org> 040923 * - * Randy Dunlap fixed u64 printk compiler waring + * Randy Dunlap fixed u64 printk compiler warning * * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5be499b6a2d2..ab293a3066b3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2162,7 +2162,14 @@ replay: } err = rtnl_configure_link(dev, ifm); if (err < 0) { - unregister_netdevice(dev); + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } goto out; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eaa51ddf2368..433424804284 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -155,7 +155,7 @@ write_unlock: rcu_read_unlock(); len = min(sizeof(kbuf) - 1, *lenp); - len = cpumask_scnprintf(kbuf, len, mask); + len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); if (!len) { *lenp = 0; goto done; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d104ae15836f..f23deadf42a0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -521,10 +521,13 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, struct device_node *phy_dn, *port_dn; bool phy_is_fixed = false; u32 phy_flags = 0; - int ret; + int mode, ret; port_dn = cd->port_dn[p->port]; - p->phy_interface = of_get_phy_mode(port_dn); + mode = of_get_phy_mode(port_dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + p->phy_interface = mode; phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); if (of_phy_is_fixed_link(port_dn)) { @@ -559,6 +562,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, if (!p->phy) return -ENODEV; + /* Use already configured phy mode */ + p->phy_interface = p->phy->interface; phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, p->phy_interface); } else { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5160c710f2eb..e361ea6f3fc8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -378,20 +378,18 @@ static int cipso_v4_cache_check(const unsigned char *key, * negative values on failure. * */ -int cipso_v4_cache_add(const struct sk_buff *skb, +int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; - unsigned char *cipso_ptr; u32 cipso_ptr_len; if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) return 0; - cipso_ptr = CIPSO_V4_OPTPTR(skb); cipso_ptr_len = cipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); @@ -1579,6 +1577,33 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, } /** + * cipso_v4_optptr - Find the CIPSO option in the packet + * @skb: the packet + * + * Description: + * Parse the packet's IP header looking for a CIPSO option. Returns a pointer + * to the start of the CIPSO option on success, NULL if one if not found. + * + */ +unsigned char *cipso_v4_optptr(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + unsigned char *optptr = (unsigned char *)&(ip_hdr(skb)[1]); + int optlen; + int taglen; + + for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { + if (optptr[0] == IPOPT_CIPSO) + return optptr; + taglen = optptr[1]; + optlen -= taglen; + optptr += taglen; + } + + return NULL; +} + +/** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error * @@ -2119,8 +2144,8 @@ void cipso_v4_req_delattr(struct request_sock *req) * on success and negative values on failure. * */ -static int cipso_v4_getattr(const unsigned char *cipso, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; u32 doi; @@ -2305,22 +2330,6 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb) return 0; } -/** - * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option - * @skb: the packet - * @secattr: the security attributes - * - * Description: - * Parse the given packet's CIPSO option and return the security attributes. - * Returns zero on success and negative values on failure. - * - */ -int cipso_v4_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr); -} - /* * Setup Functions */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f0b4a31d7bd6..3a8985c94581 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1186,7 +1186,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) no_in_dev: /* Not loopback addresses on loopback should be preferred - in this case. It is importnat that lo is the first interface + in this case. It is important that lo is the first interface in dev_base list. */ for_each_netdev_rcu(net, dev) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 92ddea1e6457..ff069f6597ac 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -22,14 +22,18 @@ static LIST_HEAD(fou_list); struct fou { struct socket *sock; u8 protocol; + u8 flags; u16 port; struct udp_offload udp_offloads; struct list_head list; }; +#define FOU_F_REMCSUM_NOPARTIAL BIT(0) + struct fou_cfg { u16 type; u8 protocol; + u8 flags; struct udp_port_cfg udp_config; }; @@ -64,24 +68,20 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, - void *data, size_t hdrlen, u8 ipproto) + void *data, size_t hdrlen, u8 ipproto, + bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - if (skb->remcsum_offload) { - /* Already processed in GRO path */ - skb->remcsum_offload = 0; - return guehdr; - } - if (!pskb_may_pull(skb, plen)) return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, + start, offset, nopartial); return guehdr; } @@ -142,7 +142,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_remcsum(skb, guehdr, data + doffset, - hdrlen, guehdr->proto_ctype); + hdrlen, guehdr->proto_ctype, + !!(fou->flags & + FOU_F_REMCSUM_NOPARTIAL)); if (!guehdr) goto drop; @@ -214,7 +216,8 @@ out_unlock: static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, struct guehdr *guehdr, void *data, - size_t hdrlen, u8 ipproto) + size_t hdrlen, u8 ipproto, + struct gro_remcsum *grc, bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); @@ -222,7 +225,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); if (skb->remcsum_offload) - return guehdr; + return NULL; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; @@ -234,7 +237,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return NULL; } - skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); + skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -254,6 +258,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; + struct fou *fou = container_of(uoff, struct fou, udp_offloads); + struct gro_remcsum grc; + + skb_gro_remcsum_init(&grc); off = skb_gro_offset(skb); len = off + sizeof(*guehdr); @@ -295,7 +303,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_gro_remcsum(skb, off, guehdr, data + doffset, hdrlen, - guehdr->proto_ctype); + guehdr->proto_ctype, &grc, + !!(fou->flags & + FOU_F_REMCSUM_NOPARTIAL)); if (!guehdr) goto out; @@ -345,6 +355,7 @@ out_unlock: rcu_read_unlock(); out: NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, &grc); return pp; } @@ -455,6 +466,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; + fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; /* Initial for fou type */ @@ -541,6 +553,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, [FOU_ATTR_TYPE] = { .type = NLA_U8, }, + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, }; static int parse_nl_config(struct genl_info *info, @@ -571,6 +584,9 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_TYPE]) cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); + if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) + cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; + return 0; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 53db2c309572..ea82fd492c1b 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -134,6 +134,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; + u32 end_seq; req->num_retrans = 0; req->num_timeout = 0; @@ -185,20 +186,35 @@ static bool tcp_fastopen_create_child(struct sock *sk, /* Queue the data carried in the SYN packet. We need to first * bump skb's refcnt because the caller will attempt to free it. + * Note that IPv6 might also have used skb_get() trick + * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts) + * So we need to eventually get a clone of the packet, + * before inserting it in sk_receive_queue. * * XXX (TFO) - we honor a zero-payload TFO request for now, * (any reason not to?) but no need to queue the skb since * there is no data. How about SYN+FIN? */ - if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { - skb = skb_get(skb); - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - skb_set_owner_r(skb, child); - __skb_queue_tail(&child->sk_receive_queue, skb); - tp->syn_data_acked = 1; + end_seq = TCP_SKB_CB(skb)->end_seq; + if (end_seq != TCP_SKB_CB(skb)->seq + 1) { + struct sk_buff *skb2; + + if (unlikely(skb_shared(skb))) + skb2 = skb_clone(skb, GFP_ATOMIC); + else + skb2 = skb_get(skb); + + if (likely(skb2)) { + skb_dst_drop(skb2); + __skb_pull(skb2, tcp_hdrlen(skb)); + skb_set_owner_r(skb2, child); + __skb_queue_tail(&child->sk_receive_queue, skb2); + tp->syn_data_acked = 1; + } else { + end_seq = TCP_SKB_CB(skb)->seq + 1; + } } - tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; sk->sk_data_ready(sk); bh_unlock_sock(child); sock_put(child); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index c2a75c6957a1..2379c1b4efb2 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -47,6 +47,10 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) return; percpu_counter_destroy(&cg_proto->sockets_allocated); + + if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) + static_key_slow_dec(&memcg_socket_limit_enabled); + } EXPORT_SYMBOL(tcp_destroy_cgroup); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d10f6f4ead27..4915d8284a86 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -402,6 +402,13 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) } rcu_read_unlock(); + + if (skb->remcsum_offload) + skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; + + skb->encapsulation = 1; + skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); + return err; } @@ -410,9 +417,13 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) + if (uh->check) { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } return udp_gro_complete(skb, nhoff); } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 2f780cba6e12..f45d6db50a45 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -172,7 +172,7 @@ static void __net_exit ip6_fl_purge(struct net *net) { int i; - spin_lock(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl; struct ip6_flowlabel __rcu **flp; @@ -190,7 +190,7 @@ static void __net_exit ip6_fl_purge(struct net *net) flp = &fl->next; } } - spin_unlock(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d33df4cbd872..7deebf102cba 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1273,7 +1273,7 @@ emsgsize: /* If this is the first and only packet and device * supports checksum offloading, let's use it. */ - if (!skb && + if (!skb && sk->sk_protocol == IPPROTO_UDP && length + fragheaderlen < mtu && rt->dst.dev->features & NETIF_F_V6_CSUM && !exthdrlen) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 98565ce0ebcd..4688bd4d7f59 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -141,7 +141,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) u32 *p = NULL; if (!(rt->dst.flags & DST_HOST)) - return NULL; + return dst_cow_metrics_generic(dst, old); peer = rt6_get_peer_create(rt); if (peer) { diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a56276996b72..ab889bb16b3c 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -161,9 +161,13 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) + if (uh->check) { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } return udp_gro_complete(skb, nhoff); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 265e190f2218..c598f74063a1 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,6 +19,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_bridge/ebtables.h> #include <net/netfilter/nf_tables.h> static int nft_compat_chain_validate_dependency(const char *tablename, @@ -40,6 +41,7 @@ static int nft_compat_chain_validate_dependency(const char *tablename, union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; + struct ebt_entry ebt; }; static inline void @@ -50,9 +52,9 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) par->hotdrop = false; } -static void nft_target_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_target_eval_xt(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); struct xt_target *target = expr->ops->data; @@ -66,7 +68,7 @@ static void nft_target_eval(const struct nft_expr *expr, if (pkt->xt.hotdrop) ret = NF_DROP; - switch(ret) { + switch (ret) { case XT_CONTINUE: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; break; @@ -74,7 +76,41 @@ static void nft_target_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = ret; break; } - return; +} + +static void nft_target_eval_bridge(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct sk_buff *skb = pkt->skb; + int ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info); + + ret = target->target(skb, &pkt->xt); + + if (pkt->xt.hotdrop) + ret = NF_DROP; + + switch (ret) { + case EBT_ACCEPT: + data[NFT_REG_VERDICT].verdict = NF_ACCEPT; + break; + case EBT_DROP: + data[NFT_REG_VERDICT].verdict = NF_DROP; + break; + case EBT_CONTINUE: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + case EBT_RETURN: + data[NFT_REG_VERDICT].verdict = NFT_RETURN; + break; + default: + data[NFT_REG_VERDICT].verdict = ret; + break; + } } static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { @@ -100,6 +136,10 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; + case NFPROTO_BRIDGE: + entry->ebt.ethproto = proto; + entry->ebt.invflags = inv ? EBT_IPROTO : 0; + break; } par->entryinfo = entry; par->target = target; @@ -307,6 +347,10 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; + case NFPROTO_BRIDGE: + entry->ebt.ethproto = proto; + entry->ebt.invflags = inv ? EBT_IPROTO : 0; + break; } par->entryinfo = entry; par->match = match; @@ -490,6 +534,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, case AF_INET6: fmt = "ip6t_%s"; break; + case NFPROTO_BRIDGE: + fmt = "ebt_%s"; + break; default: pr_err("nft_compat: unsupported protocol %d\n", nfmsg->nfgen_family); @@ -663,13 +710,17 @@ nft_target_select_ops(const struct nft_ctx *ctx, nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); - nft_target->ops.eval = nft_target_eval; nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; nft_target->ops.data = target; + if (family == NFPROTO_BRIDGE) + nft_target->ops.eval = nft_target_eval_bridge; + else + nft_target->ops.eval = nft_target_eval_xt; + list_add(&nft_target->head, &nft_target_list); return &nft_target->ops; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 6404a726d17b..9615b8b9fb37 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -39,6 +39,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, }; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index a845cd4cf21e..28cddc85b700 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -1065,10 +1065,12 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { + unsigned char *ptr; + switch (family) { case AF_INET: - if (CIPSO_V4_OPTEXIST(skb) && - cipso_v4_skbuff_getattr(skb, secattr) == 0) + ptr = cipso_v4_optptr(skb); + if (ptr && cipso_v4_getattr(ptr, secattr) == 0) return 0; break; #if IS_ENABLED(CONFIG_IPV6) @@ -1094,7 +1096,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, */ void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { - if (CIPSO_V4_OPTEXIST(skb)) + if (cipso_v4_optptr(skb)) cipso_v4_error(skb, error, gateway); } @@ -1126,11 +1128,14 @@ void netlbl_cache_invalidate(void) int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr) { + unsigned char *ptr; + if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0) return -ENOMSG; - if (CIPSO_V4_OPTEXIST(skb)) - return cipso_v4_cache_add(skb, secattr); + ptr = cipso_v4_optptr(skb); + if (ptr) + return cipso_v4_cache_add(ptr, secattr); return -ENOMSG; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index e2c348b8baca..50ec42f170a0 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -717,6 +717,8 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr, { int err; + memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); + /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(attr, key, log); if (err) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 993281e6278d..216f20b90aa5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1516,7 +1516,7 @@ int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) /* Called with ovs_mutex or RCU read lock. */ int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) { - return ovs_nla_put_key(&flow->mask->key, &flow->key, + return ovs_nla_put_key(&flow->key, &flow->key, OVS_FLOW_ATTR_KEY, false, skb); } @@ -1746,7 +1746,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start, opts_type; + int err = 0, start, opts_type; ovs_match_init(&match, &key, NULL); opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); diff --git a/net/rds/cong.c b/net/rds/cong.c index e5b65acd650b..e6144b8246fd 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -221,7 +221,21 @@ void rds_cong_queue_updates(struct rds_cong_map *map) list_for_each_entry(conn, &map->m_conn_list, c_map_item) { if (!test_and_set_bit(0, &conn->c_map_queued)) { rds_stats_inc(s_cong_update_queued); - rds_send_xmit(conn); + /* We cannot inline the call to rds_send_xmit() here + * for two reasons (both pertaining to a TCP transport): + * 1. When we get here from the receive path, we + * are already holding the sock_lock (held by + * tcp_v4_rcv()). So inlining calls to + * tcp_setsockopt and/or tcp_sendmsg will deadlock + * when it tries to get the sock_lock()) + * 2. Interrupts are masked so that we can mark the + * the port congested from both send and recv paths. + * (See comment around declaration of rdc_cong_lock). + * An attempt to get the sock_lock() here will + * therefore trigger warnings. + * Defer the xmit to rds_send_worker() instead. + */ + queue_delayed_work(rds_wq, &conn->c_send_w, 0); } } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 91eaef1844c8..78974e4d9ad2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL_GPL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold the BKL or - * the "service mutex", whichever is appropriate for the service. + * Called from a server thread as it's exiting. Caller must hold the "service + * mutex" for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c69358b3cf7f..163ac45c3639 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list); * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. - * BKL protects svc_serv->sv_nrthread. + * The "service mutex" protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * @@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list); * that no other thread will be using the transport or will * try to set XPT_DEAD. */ - int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 65b146297f5a..b681855cf970 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend) } /* - * Determine number of chunks and total bytes in chunk list. The chunk - * list has already been verified to fit within the RPCRDMA header. - */ -void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, - int *ch_count, int *byte_count) -{ - /* compute the number of bytes represented by read chunks */ - *byte_count = 0; - *ch_count = 0; - for (; ch->rc_discrim != 0; ch++) { - *byte_count = *byte_count + ntohl(ch->rc_target.rs_length); - *ch_count = *ch_count + 1; - } -} - -/* * Decodes a write chunk list. The expected format is as follows: * descrim : xdr_one * nchunks : <count> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index e0110270d650..f9f13a32ddb8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -43,7 +43,6 @@ #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> #include <linux/spinlock.h> -#include <linux/highmem.h> #include <asm/unaligned.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *ctxt, u32 byte_count) { + struct rpcrdma_msg *rmsgp; struct page *page; u32 bc; int sge_no; @@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* If data remains, store it in the pagelist */ rqstp->rq_arg.page_len = bc; rqstp->rq_arg.page_base = 0; - rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; + if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) + rqstp->rq_arg.pages = &rqstp->rq_pages[0]; + else + rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + sge_no = 1; while (bc && sge_no < ctxt->count) { page = ctxt->pages[sge_no]; @@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_respages = &rqstp->rq_pages[sge_no]; rqstp->rq_next_page = rqstp->rq_respages + 1; - /* We should never run out of SGE because the limit is defined to - * support the max allowed RPC data length - */ - BUG_ON(bc && (sge_no == ctxt->count)); - BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) - != byte_count); - BUG_ON(rqstp->rq_arg.len != byte_count); - /* If not all pages were used from the SGL, free the remaining ones */ bc = sge_no; while (sge_no < ctxt->count) { @@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) return min_t(int, sge_count, xprt->sc_max_sge); } -typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last); - /* Issue an RDMA_READ using the local lkey to map the data sink */ -static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; @@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, } /* Issue an RDMA_READ using an FRMR to map the data sink */ -static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; @@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, return ret; } +static unsigned int +rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) +{ + unsigned int count; + + for (count = 0; ch->rc_discrim != xdr_zero; ch++) + count++; + return count; +} + +/* If there was additional inline content, append it to the end of arg.pages. + * Tail copy has to be done after the reader function has determined how many + * pages are needed for RDMA READ. + */ +static int +rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, + u32 position, u32 byte_count, u32 page_offset, int page_no) +{ + char *srcp, *destp; + int ret; + + ret = 0; + srcp = head->arg.head[0].iov_base + position; + byte_count = head->arg.head[0].iov_len - position; + if (byte_count > PAGE_SIZE) { + dprintk("svcrdma: large tail unsupported\n"); + return 0; + } + + /* Fit as much of the tail on the current page as possible */ + if (page_offset != PAGE_SIZE) { + destp = page_address(rqstp->rq_arg.pages[page_no]); + destp += page_offset; + while (byte_count--) { + *destp++ = *srcp++; + page_offset++; + if (page_offset == PAGE_SIZE && byte_count) + goto more; + } + goto done; + } + +more: + /* Fit the rest on the next page */ + page_no++; + destp = page_address(rqstp->rq_arg.pages[page_no]); + while (byte_count--) + *destp++ = *srcp++; + + rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + +done: + byte_count = head->arg.head[0].iov_len - position; + head->arg.page_len += byte_count; + head->arg.len += byte_count; + head->arg.buflen += byte_count; + return 1; +} + static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { - int page_no, ch_count, ret; + int page_no, ret; struct rpcrdma_read_chunk *ch; - u32 page_offset, byte_count; + u32 handle, page_offset, byte_count; + u32 position; u64 rs_offset; - rdma_reader_fn reader; + bool last; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); - if (ch_count > RPCSVC_MAXPAGES) + if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) return -EINVAL; /* The request is completed when the RDMA_READs complete. The @@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, */ head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; head->arg.page_base = 0; head->arg.page_len = 0; head->arg.len = rqstp->rq_arg.len; head->arg.buflen = rqstp->rq_arg.buflen; - /* Use FRMR if supported */ - if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) - reader = rdma_read_chunk_frmr; - else - reader = rdma_read_chunk_lcl; + ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + position = be32_to_cpu(ch->rc_position); + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + if (position == 0) { + head->arg.pages = &head->pages[0]; + page_offset = head->byte_len; + } else { + head->arg.pages = &head->pages[head->count]; + page_offset = 0; + } - page_no = 0; page_offset = 0; - for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch->rc_discrim != 0; ch++) { + ret = 0; + page_no = 0; + for (; ch->rc_discrim != xdr_zero; ch++) { + if (be32_to_cpu(ch->rc_position) != position) + goto err; + handle = be32_to_cpu(ch->rc_target.rs_handle), + byte_count = be32_to_cpu(ch->rc_target.rs_length); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - byte_count = ntohl(ch->rc_target.rs_length); while (byte_count > 0) { - ret = reader(xprt, rqstp, head, - &page_no, &page_offset, - ntohl(ch->rc_target.rs_handle), - byte_count, rs_offset, - ((ch+1)->rc_discrim == 0) /* last */ - ); + last = (ch + 1)->rc_discrim == xdr_zero; + ret = xprt->sc_reader(xprt, rqstp, head, + &page_no, &page_offset, + handle, byte_count, + rs_offset, last); if (ret < 0) goto err; byte_count -= ret; @@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, head->arg.buflen += ret; } } + + /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ + if (page_offset & 3) { + u32 pad = 4 - (page_offset & 3); + + head->arg.page_len += pad; + head->arg.len += pad; + head->arg.buflen += pad; + page_offset += pad; + } + ret = 1; + if (position && position < head->arg.head[0].iov_len) + ret = rdma_copy_tail(rqstp, head, position, + byte_count, page_offset, page_no); + head->arg.head[0].iov_len = position; + head->position = position; + err: /* Detach arg pages. svc_recv will replenish them */ for (page_no = 0; @@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, return ret; } -/* - * To avoid a separate RDMA READ just for a handful of zero bytes, - * RFC 5666 section 3.7 allows the client to omit the XDR zero pad - * in chunk lists. - */ -static void -rdma_fix_xdr_pad(struct xdr_buf *buf) -{ - unsigned int page_len = buf->page_len; - unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len; - unsigned int offset, pg_no; - char *p; - - if (size == 0) - return; - - pg_no = page_len >> PAGE_SHIFT; - offset = page_len & ~PAGE_MASK; - p = page_address(buf->pages[pg_no]); - memset(p + offset, 0, size); - - buf->page_len += size; - buf->buflen += size; - buf->len += size; -} - static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { int page_no; int ret; - BUG_ON(!head); - /* Copy RPC pages */ for (page_no = 0; page_no < head->count; page_no++) { put_page(rqstp->rq_pages[page_no]); rqstp->rq_pages[page_no] = head->pages[page_no]; } + + /* Adjustments made for RDMA_NOMSG type requests */ + if (head->position == 0) { + if (head->arg.len <= head->sge[0].length) { + head->arg.head[0].iov_len = head->arg.len - + head->byte_len; + head->arg.page_len = 0; + } else { + head->arg.head[0].iov_len = head->sge[0].length - + head->byte_len; + head->arg.page_len = head->arg.len - + head->sge[0].length; + } + } + /* Point rq_arg.pages past header */ - rdma_fix_xdr_pad(&head->arg); rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; @@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.tail[0].iov_len; - dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " - "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", + dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, " + "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n", ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); @@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", ctxt, rdma_xprt, rqstp, ctxt->wc_status); - BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); atomic_inc(&rdma_stat_recv); /* Build up the XDR from the receive buffers. */ @@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) + rqstp->rq_arg.tail[0].iov_len; svc_rdma_put_context(ctxt, 0); out: - dprintk("svcrdma: ret = %d, rq_arg.len =%d, " - "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", + dprintk("svcrdma: ret=%d, rq_arg.len=%u, " + "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9f1b50689c0f..7de33d1af9b6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, u32 page_off; int page_no; - BUG_ON(xdr->len != - (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); + if (xdr->len != + (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { + pr_err("svcrdma: map_xdr: XDR buffer length error\n"); + return -EIO; + } /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, int bc; struct svc_rdma_op_ctxt *ctxt; - BUG_ON(vec->count > RPCSVC_MAXPAGES); + if (vec->count > RPCSVC_MAXPAGES) { + pr_err("svcrdma: Too many pages (%lu)\n", vec->count); + return -EIO; + } + dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, vec->sge=%p, vec->count=%lu\n", rmr, (unsigned long long)to, xdr_off, @@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_off = 0; sge_no++; xdr_sge_no++; - BUG_ON(xdr_sge_no > vec->count); + if (xdr_sge_no > vec->count) { + pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no); + goto err; + } bc -= sge_bytes; if (sge_no == xprt->sc_max_sge) break; @@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } - BUG_ON(byte_count != 0); + if (byte_count != 0) { + pr_err("svcrdma: Could not map %d bytes\n", byte_count); + goto err; + } /* Save all respages in the ctxt and remove them from the * respages array. They are our pages until the I/O @@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; - BUG_ON(sge_no > rdma->sc_max_sge); + if (sge_no > rdma->sc_max_sge) { + pr_err("svcrdma: Too many sges (%d)\n", sge_no); + goto err; + } memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; @@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) { } -/* - * Return the start of an xdr buffer. - */ -static void *xdr_start(struct xdr_buf *xdr) -{ - return xdr->head[0].iov_base - - (xdr->len - - xdr->page_len - - xdr->tail[0].iov_len - - xdr->head[0].iov_len); -} - int svc_rdma_sendto(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; @@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); - /* Get the RDMA request header. */ - rdma_argp = xdr_start(&rqstp->rq_arg); + /* Get the RDMA request header. The receive logic always + * places this at the start of page 0. + */ + rdma_argp = page_address(rqstp->rq_pages[0]); /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4e618808bc98..f609c1c2d38d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) struct svcxprt_rdma *xprt; int i; - BUG_ON(!ctxt); xprt = ctxt->xprt; if (free_pages) for (i = 0; i < ctxt->count; i++) @@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt, switch (ctxt->wr_op) { case IB_WR_SEND: - BUG_ON(ctxt->frmr); + if (ctxt->frmr) + pr_err("svcrdma: SEND: ctxt->frmr != NULL\n"); svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: - BUG_ON(ctxt->frmr); + if (ctxt->frmr) + pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n"); svc_rdma_put_context(ctxt, 0); break; @@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt, svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); + if (read_hdr) { + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + } else { + pr_err("svcrdma: ctxt->read_hdr == NULL\n"); + } svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); break; default: - BUG_ON(1); printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d\n", ctxt->wr_op); @@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) buflen = 0; ctxt->direction = DMA_FROM_DEVICE; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { - BUG_ON(sge_no >= xprt->sc_max_sge); + if (sge_no >= xprt->sc_max_sge) { + pr_err("svcrdma: Too many sges (%d)\n", sge_no); + goto err_put_ctxt; + } page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, @@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, { struct rdma_cm_id *listen_id; struct svcxprt_rdma *cma_xprt; - struct svc_xprt *xprt; int ret; dprintk("svcrdma: Creating RDMA socket\n"); @@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) return ERR_PTR(-ENOMEM); - xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, IB_QPT_RC); @@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, if (frmr) { frmr_unmap_dma(rdma, frmr); spin_lock_bh(&rdma->sc_frmr_q_lock); - BUG_ON(!list_empty(&frmr->frmr_list)); + WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); spin_unlock_bh(&rdma->sc_frmr_q_lock); } @@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) * NB: iWARP requires remote write access for the data sink * of an RDMA_READ. IB does not. */ + newxprt->sc_reader = rdma_read_chunk_lcl; if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { newxprt->sc_frmr_pg_list_len = devattr.max_fast_reg_page_list_len; newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + newxprt->sc_reader = rdma_read_chunk_frmr; } /* @@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work) dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); /* We should only be called from kref_put */ - BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); + if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) + pr_err("svcrdma: sc_xprt still in use? (%d)\n", + atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); /* * Destroy queued, but not processed read completions. Note @@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work) } /* Warn if we leaked a resource or under-referenced */ - WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); - WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + if (atomic_read(&rdma->sc_ctxt_used) != 0) + pr_err("svcrdma: ctxt still in use? (%d)\n", + atomic_read(&rdma->sc_ctxt_used)); + if (atomic_read(&rdma->sc_dma_used) != 0) + pr_err("svcrdma: dma still in use? (%d)\n", + atomic_read(&rdma->sc_dma_used)); /* De-allocate fastreg mr */ rdma_dealloc_frmr_q(rdma); @@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; - BUG_ON(wr->send_flags != IB_SEND_SIGNALED); wr_count = 1; for (n_wr = wr->next; n_wr; n_wr = n_wr->next) wr_count++; |