diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 19:01:50 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 19:01:50 +0200 |
commit | 3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch) | |
tree | 3df72faaacd494d5ac8c9668df4f529b1b5e4457 /net/bridge | |
parent | Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s39... (diff) | |
parent | genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP (diff) | |
download | linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.tar.xz linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller:
1) Remove the ipv4 routing cache. Now lookups go directly into the FIB
trie and use prebuilt routes cached there.
No more garbage collection, no more rDOS attacks on the routing
cache. Instead we now get predictable and consistent performance,
no matter what the pattern of traffic we service.
This has been almost 2 years in the making. Special thanks to
Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who
have helped along the way.
I'm sure that with a change of this magnitude there will be some
kind of fallout, but such things ought the be simple to fix at this
point. Luckily I'm not European so I'll be around all of August to
fix things :-)
The major stages of this work here are each fronted by a forced
merge commit whose commit message contains a top-level description
of the motivations and implementation issues.
2) Pre-demux of established ipv4 TCP sockets, saves a route demux on
input.
3) TCP SYN/ACK performance tweaks from Eric Dumazet.
4) Add namespace support for netfilter L4 conntrack helpers, from Gao
Feng.
5) Add config mechanism for Energy Efficient Ethernet to ethtool, from
Yuval Mintz.
6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet.
7) Support for connection tracker helpers in userspace, from Pablo
Neira Ayuso.
8) Allow userspace driven TX load balancing functions in TEAM driver,
from Jiri Pirko.
9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with
embedded gotos.
10) TCP Small Queues, essentially minimize the amount of TCP data queued
up in the packet scheduler layer. Whereas the existing BQL (Byte
Queue Limits) limits the pkt_sched --> netdevice queuing levels,
this controls the TCP --> pkt_sched queueing levels.
From Eric Dumazet.
11) Reduce the number of get_page/put_page ops done on SKB fragments,
from Alexander Duyck.
12) Implement protection against blind resets in TCP (RFC 5961), from
Eric Dumazet.
13) Support the client side of TCP Fast Open, basically the ability to
send data in the SYN exchange, from Yuchung Cheng.
Basically, the sender queues up data with a sendmsg() call using
MSG_FASTOPEN, then they do the connect() which emits the queued up
fastopen data.
14) Avoid all the problems we get into in TCP when timers or PMTU events
hit a locked socket. The TCP Small Queues changes added a
tcp_release_cb() that allows us to queue work up to the
release_sock() caller, and that's what we use here too. From Eric
Dumazet.
15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits)
genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP
r8169: revert "add byte queue limit support".
ipv4: Change rt->rt_iif encoding.
net: Make skb->skb_iif always track skb->dev
ipv4: Prepare for change of rt->rt_iif encoding.
ipv4: Remove all RTCF_DIRECTSRC handliing.
ipv4: Really ignore ICMP address requests/replies.
decnet: Don't set RTCF_DIRECTSRC.
net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse.
ipv4: Remove redundant assignment
rds: set correct msg_namelen
openvswitch: potential NULL deref in sample()
tcp: dont drop MTU reduction indications
bnx2x: Add new 57840 device IDs
tcp: avoid oops in tcp_metrics and reset tcpm_stamp
niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value
niu: Fix to check for dma mapping errors.
net: Fix references to out-of-scope variables in put_cmsg_compat()
net: ethernet: davinci_emac: add pm_runtime support
net: ethernet: davinci_emac: Remove unnecessary #include
...
Diffstat (limited to 'net/bridge')
-rw-r--r-- | net/bridge/br_device.c | 9 | ||||
-rw-r--r-- | net/bridge/br_multicast.c | 11 | ||||
-rw-r--r-- | net/bridge/br_netfilter.c | 77 | ||||
-rw-r--r-- | net/bridge/netfilter/ebt_ulog.c | 29 |
4 files changed, 71 insertions, 55 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 929e48aed444..333484537600 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -127,9 +127,9 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, const struct br_cpu_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { - start = u64_stats_fetch_begin(&bstats->syncp); + start = u64_stats_fetch_begin_bh(&bstats->syncp); memcpy(&tmp, bstats, sizeof(tmp)); - } while (u64_stats_fetch_retry(&bstats->syncp, start)); + } while (u64_stats_fetch_retry_bh(&bstats->syncp, start)); sum.tx_bytes += tmp.tx_bytes; sum.tx_packets += tmp.tx_packets; sum.rx_bytes += tmp.rx_bytes; @@ -246,10 +246,7 @@ int br_netpoll_enable(struct net_bridge_port *p) if (!np) goto out; - np->dev = p->dev; - strlcpy(np->dev_name, p->dev->name, IFNAMSIZ); - - err = __netpoll_setup(np); + err = __netpoll_setup(np, p->dev); if (err) { kfree(np); goto out; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b66581208cb2..241743417f49 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -540,10 +540,11 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( if (mdb->size >= max) { max *= 2; - if (unlikely(max >= br->hash_max)) { - br_warn(br, "Multicast hash table maximum " - "reached, disabling snooping: %s, %d\n", - port ? port->dev->name : br->dev->name, max); + if (unlikely(max > br->hash_max)) { + br_warn(br, "Multicast hash table maximum of %d " + "reached, disabling snooping: %s\n", + br->hash_max, + port ? port->dev->name : br->dev->name); err = -E2BIG; disable: br->multicast_disabled = 1; @@ -1160,7 +1161,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; } mld = (struct mld_msg *) icmp6_hdr(skb); - max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay)); + max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; } else if (skb->len >= sizeof(*mld2q)) { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index e41456bd3cc6..68e8f364bbf8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,7 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ +} + +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -120,7 +126,9 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) return NULL; } -static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return NULL; } @@ -134,6 +142,7 @@ static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, + .redirect = fake_redirect, .cow_metrics = fake_cow_metrics, .neigh_lookup = fake_neigh_lookup, .mtu = fake_mtu, @@ -373,19 +382,29 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) if (!skb->dev) goto free_skb; dst = skb_dst(skb); - neigh = dst_get_neighbour_noref(dst); - if (neigh->hh.hh_len) { - neigh_hh_bridge(&neigh->hh, skb); - skb->dev = nf_bridge->physindev; - return br_handle_frame_finish(skb); - } else { - /* the neighbour function below overwrites the complete - * MAC header, so we save the Ethernet source address and - * protocol number. */ - skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); - /* tell br_dev_xmit to continue with forwarding */ - nf_bridge->mask |= BRNF_BRIDGED_DNAT; - return neigh->output(neigh, skb); + neigh = dst_neigh_lookup_skb(dst, skb); + if (neigh) { + int ret; + + if (neigh->hh.hh_len) { + neigh_hh_bridge(&neigh->hh, skb); + skb->dev = nf_bridge->physindev; + ret = br_handle_frame_finish(skb); + } else { + /* the neighbour function below overwrites the complete + * MAC header, so we save the Ethernet source address and + * protocol number. + */ + skb_copy_from_linear_data_offset(skb, + -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, + ETH_HLEN-ETH_ALEN); + /* tell br_dev_xmit to continue with forwarding */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + ret = neigh->output(neigh, skb); + } + neigh_release(neigh); + return ret; } free_skb: kfree_skb(skb); @@ -764,9 +783,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, return NF_DROP; if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) - pf = PF_INET; + pf = NFPROTO_IPV4; else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) - pf = PF_INET6; + pf = NFPROTO_IPV6; else return NF_ACCEPT; @@ -778,13 +797,13 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (pf == PF_INET && br_parse_ip_options(skb)) + if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) return NF_DROP; /* The physdev module checks on this */ nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; - if (pf == PF_INET) + if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); @@ -871,9 +890,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, return NF_DROP; if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) - pf = PF_INET; + pf = NFPROTO_IPV4; else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) - pf = PF_INET6; + pf = NFPROTO_IPV6; else return NF_ACCEPT; @@ -886,7 +905,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, nf_bridge_pull_encap_header(skb); nf_bridge_save_header(skb); - if (pf == PF_INET) + if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); @@ -919,49 +938,49 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { { .hook = br_nf_pre_routing, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, { .hook = br_nf_local_in, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_BRNF, }, { .hook = br_nf_forward_ip, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_BRNF - 1, }, { .hook = br_nf_forward_arp, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_BRNF, }, { .hook = br_nf_post_routing, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_LAST, }, { .hook = ip_sabotage_in, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_FIRST, }, { .hook = ip_sabotage_in, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 5449294bdd5e..19063473c71f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -145,19 +145,24 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (!ub->skb) { if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; + goto unlock; } else if (size > skb_tailroom(ub->skb)) { ulog_send(group); if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; + goto unlock; } - nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, 0, - size - NLMSG_ALIGN(sizeof(*nlh))); + nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0, + size - NLMSG_ALIGN(sizeof(*nlh)), 0); + if (!nlh) { + kfree_skb(ub->skb); + ub->skb = NULL; + goto unlock; + } ub->qlen++; - pm = NLMSG_DATA(nlh); + pm = nlmsg_data(nlh); /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; @@ -209,14 +214,6 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, unlock: spin_unlock_bh(lock); - - return; - -nlmsg_failure: - pr_debug("error during NLMSG_PUT. This should " - "not happen, please report to author.\n"); -alloc_failure: - goto unlock; } /* this function is registered with the netfilter core */ @@ -285,6 +282,9 @@ static int __init ebt_ulog_init(void) { int ret; int i; + struct netlink_kernel_cfg cfg = { + .groups = EBT_ULOG_MAXNLGROUPS, + }; if (nlbufsiz >= 128*1024) { pr_warning("Netlink buffer has to be <= 128kB," @@ -299,8 +299,7 @@ static int __init ebt_ulog_init(void) } ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - EBT_ULOG_MAXNLGROUPS, NULL, NULL, - THIS_MODULE); + THIS_MODULE, &cfg); if (!ebtulognl) ret = -ENOMEM; else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) |