summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c29
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/cipso_ipv4.c113
-rw-r--r--net/ipv4/datagram.c9
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/icmp.c35
-rw-r--r--net/ipv4/igmp.c22
-rw-r--r--net/ipv4/inet_connection_sock.c22
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_lro.c4
-rw-r--r--net/ipv4/ip_gre.c70
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_options.c38
-rw-r--r--net/ipv4/ip_output.c69
-rw-r--r--net/ipv4/ip_sockglue.c37
-rw-r--r--net/ipv4/ipcomp.c4
-rw-r--r--net/ipv4/ipip.c36
-rw-r--r--net/ipv4/ipmr.c39
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/ip_tables.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/raw.c47
-rw-r--r--net/ipv4/route.c323
-rw-r--r--net/ipv4/syncookies.c22
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_ipv4.c58
-rw-r--r--net/ipv4/udp.c41
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv4/xfrm4_state.c2
31 files changed, 646 insertions, 579 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 807d83c02ef6..7b91fa8bf83c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
- kfree(inet->opt);
+ kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
sk_refcnt_debug_dec(sk);
}
@@ -1103,14 +1103,18 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
__be32 old_saddr = inet->inet_saddr;
__be32 daddr = inet->inet_daddr;
+ struct flowi4 fl4;
struct rtable *rt;
__be32 new_saddr;
+ struct ip_options_rcu *inet_opt;
- if (inet->opt && inet->opt->srr)
- daddr = inet->opt->faddr;
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
/* Query new route. */
- rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk),
+ rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
sk->sk_bound_dev_if, sk->sk_protocol,
inet->inet_sport, inet->inet_dport, sk, false);
if (IS_ERR(rt))
@@ -1118,7 +1122,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
sk_setup_caps(sk, &rt->dst);
- new_saddr = rt->rt_src;
+ new_saddr = fl4.saddr;
if (new_saddr == old_saddr)
return 0;
@@ -1147,6 +1151,8 @@ int inet_sk_rebuild_header(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
+ struct ip_options_rcu *inet_opt;
+ struct flowi4 fl4;
int err;
/* Route is OK, nothing to do. */
@@ -1154,10 +1160,13 @@ int inet_sk_rebuild_header(struct sock *sk)
return 0;
/* Reroute. */
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
daddr = inet->inet_daddr;
- if (inet->opt && inet->opt->srr)
- daddr = inet->opt->faddr;
- rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr,
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
+ rcu_read_unlock();
+ rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
sk->sk_protocol, RT_CONN_FLAGS(sk),
sk->sk_bound_dev_if);
@@ -1186,7 +1195,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
static int inet_gso_send_check(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
const struct net_protocol *ops;
int proto;
int ihl;
@@ -1293,7 +1302,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
const struct net_protocol *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
- struct iphdr *iph;
+ const struct iphdr *iph;
unsigned int hlen;
unsigned int off;
unsigned int id;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4286fd3cc0e2..c1f4154552fc 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
* into IP header for icv calculation. Options are already checked
* for validity, so paranoia is not required. */
-static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr)
+static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
{
unsigned char * optptr = (unsigned char*)(iph+1);
int l = iph->ihl*4 - sizeof(struct iphdr);
@@ -396,7 +396,7 @@ out:
static void ah4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
@@ -404,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ ah->spi, IPPROTO_AH, AF_INET);
if (!x)
return;
printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a0af7ea87870..2b3c23c287cd 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
return CIPSO_V4_HDR_LEN + ret_val;
}
+static void opt_kfree_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ip_options_rcu, rcu));
+}
+
/**
* cipso_v4_sock_setattr - Add a CIPSO option to a socket
* @sk: the socket
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
unsigned char *buf = NULL;
u32 buf_len;
u32 opt_len;
- struct ip_options *opt = NULL;
+ struct ip_options_rcu *old, *opt = NULL;
struct inet_sock *sk_inet;
struct inet_connection_sock *sk_conn;
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
ret_val = -ENOMEM;
goto socket_setattr_failure;
}
- memcpy(opt->__data, buf, buf_len);
- opt->optlen = opt_len;
- opt->cipso = sizeof(struct iphdr);
+ memcpy(opt->opt.__data, buf, buf_len);
+ opt->opt.optlen = opt_len;
+ opt->opt.cipso = sizeof(struct iphdr);
kfree(buf);
buf = NULL;
sk_inet = inet_sk(sk);
+
+ old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk));
if (sk_inet->is_icsk) {
sk_conn = inet_csk(sk);
- if (sk_inet->opt)
- sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
- sk_conn->icsk_ext_hdr_len += opt->optlen;
+ if (old)
+ sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
+ sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
}
- opt = xchg(&sk_inet->opt, opt);
- kfree(opt);
+ rcu_assign_pointer(sk_inet->inet_opt, opt);
+ if (old)
+ call_rcu(&old->rcu, opt_kfree_rcu);
return 0;
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
unsigned char *buf = NULL;
u32 buf_len;
u32 opt_len;
- struct ip_options *opt = NULL;
+ struct ip_options_rcu *opt = NULL;
struct inet_request_sock *req_inet;
/* We allocate the maximum CIPSO option size here so we are probably
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
ret_val = -ENOMEM;
goto req_setattr_failure;
}
- memcpy(opt->__data, buf, buf_len);
- opt->optlen = opt_len;
- opt->cipso = sizeof(struct iphdr);
+ memcpy(opt->opt.__data, buf, buf_len);
+ opt->opt.optlen = opt_len;
+ opt->opt.cipso = sizeof(struct iphdr);
kfree(buf);
buf = NULL;
req_inet = inet_rsk(req);
opt = xchg(&req_inet->opt, opt);
- kfree(opt);
+ if (opt)
+ call_rcu(&opt->rcu, opt_kfree_rcu);
return 0;
@@ -2016,34 +2025,34 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
{
int hdr_delta = 0;
- struct ip_options *opt = *opt_ptr;
+ struct ip_options_rcu *opt = *opt_ptr;
- if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
+ if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
unsigned char *cipso_ptr;
int iter;
int optlen_new;
- cipso_off = opt->cipso - sizeof(struct iphdr);
- cipso_ptr = &opt->__data[cipso_off];
+ cipso_off = opt->opt.cipso - sizeof(struct iphdr);
+ cipso_ptr = &opt->opt.__data[cipso_off];
cipso_len = cipso_ptr[1];
- if (opt->srr > opt->cipso)
- opt->srr -= cipso_len;
- if (opt->rr > opt->cipso)
- opt->rr -= cipso_len;
- if (opt->ts > opt->cipso)
- opt->ts -= cipso_len;
- if (opt->router_alert > opt->cipso)
- opt->router_alert -= cipso_len;
- opt->cipso = 0;
+ if (opt->opt.srr > opt->opt.cipso)
+ opt->opt.srr -= cipso_len;
+ if (opt->opt.rr > opt->opt.cipso)
+ opt->opt.rr -= cipso_len;
+ if (opt->opt.ts > opt->opt.cipso)
+ opt->opt.ts -= cipso_len;
+ if (opt->opt.router_alert > opt->opt.cipso)
+ opt->opt.router_alert -= cipso_len;
+ opt->opt.cipso = 0;
memmove(cipso_ptr, cipso_ptr + cipso_len,
- opt->optlen - cipso_off - cipso_len);
+ opt->opt.optlen - cipso_off - cipso_len);
/* determining the new total option length is tricky because of
* the padding necessary, the only thing i can think to do at
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
* from there we can determine the new total option length */
iter = 0;
optlen_new = 0;
- while (iter < opt->optlen)
- if (opt->__data[iter] != IPOPT_NOP) {
- iter += opt->__data[iter + 1];
+ while (iter < opt->opt.optlen)
+ if (opt->opt.__data[iter] != IPOPT_NOP) {
+ iter += opt->opt.__data[iter + 1];
optlen_new = iter;
} else
iter++;
- hdr_delta = opt->optlen;
- opt->optlen = (optlen_new + 3) & ~3;
- hdr_delta -= opt->optlen;
+ hdr_delta = opt->opt.optlen;
+ opt->opt.optlen = (optlen_new + 3) & ~3;
+ hdr_delta -= opt->opt.optlen;
} else {
/* only the cipso option was present on the socket so we can
* remove the entire option struct */
*opt_ptr = NULL;
- hdr_delta = opt->optlen;
- kfree(opt);
+ hdr_delta = opt->opt.optlen;
+ call_rcu(&opt->rcu, opt_kfree_rcu);
}
return hdr_delta;
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
void cipso_v4_sock_delattr(struct sock *sk)
{
int hdr_delta;
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
sk_inet = inet_sk(sk);
- opt = sk_inet->opt;
- if (opt == NULL || opt->cipso == 0)
+ opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
+ if (opt == NULL || opt->opt.cipso == 0)
return;
- hdr_delta = cipso_v4_delopt(&sk_inet->opt);
+ hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
struct inet_connection_sock *sk_conn = inet_csk(sk);
sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
struct inet_request_sock *req_inet;
req_inet = inet_rsk(req);
opt = req_inet->opt;
- if (opt == NULL || opt->cipso == 0)
+ if (opt == NULL || opt->opt.cipso == 0)
return;
cipso_v4_delopt(&req_inet->opt);
@@ -2184,14 +2193,18 @@ getattr_return:
*/
int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
{
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
+ int res = -ENOMSG;
- opt = inet_sk(sk)->opt;
- if (opt == NULL || opt->cipso == 0)
- return -ENOMSG;
-
- return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr),
- secattr);
+ rcu_read_lock();
+ opt = rcu_dereference(inet_sk(sk)->inet_opt);
+ if (opt && opt->opt.cipso)
+ res = cipso_v4_getattr(opt->opt.__data +
+ opt->opt.cipso -
+ sizeof(struct iphdr),
+ secattr);
+ rcu_read_unlock();
+ return res;
}
/**
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 85bd24ca4f6d..d5a2e6995bae 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+ struct flowi4 fl4;
struct rtable *rt;
__be32 saddr;
int oif;
@@ -46,7 +47,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!saddr)
saddr = inet->mc_addr;
}
- rt = ip_route_connect(usin->sin_addr.s_addr, saddr,
+ rt = ip_route_connect(&fl4, usin->sin_addr.s_addr, saddr,
RT_CONN_FLAGS(sk), oif,
sk->sk_protocol,
inet->inet_sport, usin->sin_port, sk, true);
@@ -62,13 +63,13 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -EACCES;
}
if (!inet->inet_saddr)
- inet->inet_saddr = rt->rt_src; /* Update source address */
+ inet->inet_saddr = fl4.saddr; /* Update source address */
if (!inet->inet_rcv_saddr) {
- inet->inet_rcv_saddr = rt->rt_src;
+ inet->inet_rcv_saddr = fl4.saddr;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
- inet->inet_daddr = rt->rt_dst;
+ inet->inet_daddr = fl4.daddr;
inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED;
inet->inet_id = jiffies;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 03f994bcf7de..a5b413416da3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -276,7 +276,7 @@ error:
static int esp_input_done2(struct sk_buff *skb, int err)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
struct esp_data *esp = x->data;
struct crypto_aead *aead = esp->aead;
@@ -484,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
static void esp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
@@ -492,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
return;
NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 451088330bbb..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
+#include <net/xfrm.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type);
* - check, that packet arrived from expected physical interface.
* called with rcu_read_lock()
*/
-int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
- struct net_device *dev, __be32 *spec_dst,
- u32 *itag, u32 mark)
+int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
+ int oif, struct net_device *dev, __be32 *spec_dst,
+ u32 *itag)
{
struct in_device *in_dev;
struct flowi4 fl4;
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = oif;
- fl4.flowi4_mark = mark;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
- rpf = IN_DEV_RPFILTER(in_dev);
+
+ /* Ignore rp_filter for packets protected by IPsec. */
+ rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
+
accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
- if (mark && !IN_DEV_SRC_VMARK(in_dev))
- fl4.flowi4_mark = 0;
+ fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
}
if (in_dev == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5fe9b8b41df3..6375c1c5f642 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct tnode {
struct work_struct work;
struct tnode *tnode_free;
};
- struct rt_trie_node *child[0];
+ struct rt_trie_node __rcu *child[0];
};
#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,7 +151,7 @@ struct trie_stat {
};
struct trie {
- struct rt_trie_node *trie;
+ struct rt_trie_node __rcu *trie;
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats stats;
#endif
@@ -177,16 +177,29 @@ static const int sync_pages = 128;
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
-static inline struct tnode *node_parent(struct rt_trie_node *node)
+/*
+ * caller must hold RTNL
+ */
+static inline struct tnode *node_parent(const struct rt_trie_node *node)
{
- return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+ unsigned long parent;
+
+ parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
+
+ return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
-static inline struct tnode *node_parent_rcu(struct rt_trie_node *node)
+/*
+ * caller must hold RCU read lock or RTNL
+ */
+static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
{
- struct tnode *ret = node_parent(node);
+ unsigned long parent;
+
+ parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
+ lockdep_rtnl_is_held());
- return rcu_dereference_rtnl(ret);
+ return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
/* Same as rcu_assign_pointer
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
node->parent = (unsigned long)ptr | NODE_TYPE(node);
}
-static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i)
+/*
+ * caller must hold RTNL
+ */
+static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
{
BUG_ON(i >= 1U << tn->bits);
- return tn->child[i];
+ return rtnl_dereference(tn->child[i]);
}
-static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
+/*
+ * caller must hold RCU read lock or RTNL
+ */
+static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
{
- struct rt_trie_node *ret = tnode_get_child(tn, i);
+ BUG_ON(i >= 1U << tn->bits);
- return rcu_dereference_rtnl(ret);
+ return rcu_dereference_rtnl(tn->child[i]);
}
static inline int tnode_child_length(const struct tnode *tn)
@@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
int wasfull)
{
- struct rt_trie_node *chi = tn->child[i];
+ struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
int isfull;
BUG_ON(i >= 1<<tn->bits);
@@ -665,7 +684,7 @@ one_child:
for (i = 0; i < tnode_child_length(tn); i++) {
struct rt_trie_node *n;
- n = tn->child[i];
+ n = rtnl_dereference(tn->child[i]);
if (!n)
continue;
@@ -679,6 +698,20 @@ one_child:
return (struct rt_trie_node *) tn;
}
+
+static void tnode_clean_free(struct tnode *tn)
+{
+ int i;
+ struct tnode *tofree;
+
+ for (i = 0; i < tnode_child_length(tn); i++) {
+ tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
+ if (tofree)
+ tnode_free(tofree);
+ }
+ tnode_free(tn);
+}
+
static struct tnode *inflate(struct trie *t, struct tnode *tn)
{
struct tnode *oldtnode = tn;
@@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
inode = (struct tnode *) node;
if (inode->bits == 1) {
- put_child(t, tn, 2*i, inode->child[0]);
- put_child(t, tn, 2*i+1, inode->child[1]);
+ put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
+ put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
tnode_free_safe(inode);
continue;
@@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
size = tnode_child_length(left);
for (j = 0; j < size; j++) {
- put_child(t, left, j, inode->child[j]);
- put_child(t, right, j, inode->child[j + size]);
+ put_child(t, left, j, rtnl_dereference(inode->child[j]));
+ put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
}
put_child(t, tn, 2*i, resize(t, left));
put_child(t, tn, 2*i+1, resize(t, right));
@@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
tnode_free_safe(oldtnode);
return tn;
nomem:
- {
- int size = tnode_child_length(tn);
- int j;
-
- for (j = 0; j < size; j++)
- if (tn->child[j])
- tnode_free((struct tnode *)tn->child[j]);
-
- tnode_free(tn);
-
- return ERR_PTR(-ENOMEM);
- }
+ tnode_clean_free(tn);
+ return ERR_PTR(-ENOMEM);
}
static struct tnode *halve(struct trie *t, struct tnode *tn)
@@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
tnode_free_safe(oldtnode);
return tn;
nomem:
- {
- int size = tnode_child_length(tn);
- int j;
-
- for (j = 0; j < size; j++)
- if (tn->child[j])
- tnode_free((struct tnode *)tn->child[j]);
-
- tnode_free(tn);
-
- return ERR_PTR(-ENOMEM);
- }
+ tnode_clean_free(tn);
+ return ERR_PTR(-ENOMEM);
}
/* readside must use rcu_read_lock currently dump routines
@@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
t_key cindex;
pos = 0;
- n = t->trie;
+ n = rtnl_dereference(t->trie);
/* If we point to NULL, stop. Either the tree is empty and we should
* just put a new leaf in if, or we have reached an empty child slot,
@@ -1319,6 +1332,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
}
}
+ if (!plen)
+ tb->tb_num_default++;
+
list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
@@ -1684,6 +1700,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
list_del_rcu(&fa->fa_list);
+ if (!plen)
+ tb->tb_num_default--;
+
if (list_empty(fa_head)) {
hlist_del_rcu(&li->hlist);
free_leaf_info(li);
@@ -1756,7 +1775,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
continue;
if (IS_LEAF(c)) {
- prefetch(p->child[idx]);
+ prefetch(rcu_dereference_rtnl(p->child[idx]));
return (struct leaf *) c;
}
@@ -1974,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id)
tb->tb_id = id;
tb->tb_default = -1;
+ tb->tb_num_default = 0;
t = (struct trie *) tb->tb_data;
memset(t, 0, sizeof(*t));
@@ -2269,7 +2289,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
/* walk rest of this hash chain */
h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
- while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
+ while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
if (n)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5f8a71d3a2a..cfeca3c2152d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -108,8 +108,7 @@ struct icmp_bxm {
__be32 times[3];
} data;
int head_len;
- struct ip_options replyopts;
- unsigned char optbuf[40];
+ struct ip_options_data replyopts;
};
/* An array of errno for error messages from dest unreach. */
@@ -333,7 +332,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct inet_sock *inet;
__be32 daddr;
- if (ip_options_echo(&icmp_param->replyopts, skb))
+ if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
return;
sk = icmp_xmit_lock(net);
@@ -347,10 +346,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
ipc.tx_flags = 0;
- if (icmp_param->replyopts.optlen) {
- ipc.opt = &icmp_param->replyopts;
- if (ipc.opt->srr)
- daddr = icmp_param->replyopts.faddr;
+ if (icmp_param->replyopts.opt.opt.optlen) {
+ ipc.opt = &icmp_param->replyopts.opt;
+ if (ipc.opt->opt.srr)
+ daddr = icmp_param->replyopts.opt.opt.faddr;
}
{
struct flowi4 fl4 = {
@@ -373,14 +372,14 @@ out_unlock:
}
static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
- struct iphdr *iph,
+ const struct iphdr *iph,
__be32 saddr, u8 tos,
int type, int code,
struct icmp_bxm *param)
{
struct flowi4 fl4 = {
- .daddr = (param->replyopts.srr ?
- param->replyopts.faddr : iph->saddr),
+ .daddr = (param->replyopts.opt.opt.srr ?
+ param->replyopts.opt.opt.faddr : iph->saddr),
.saddr = saddr,
.flowi4_tos = RT_TOS(tos),
.flowi4_proto = IPPROTO_ICMP,
@@ -581,7 +580,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
IPTOS_PREC_INTERNETCONTROL) :
iph->tos;
- if (ip_options_echo(&icmp_param.replyopts, skb_in))
+ if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
goto out_unlock;
@@ -597,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param.offset = skb_network_offset(skb_in);
inet_sk(sk)->tos = tos;
ipc.addr = iph->saddr;
- ipc.opt = &icmp_param.replyopts;
+ ipc.opt = &icmp_param.replyopts.opt;
ipc.tx_flags = 0;
rt = icmp_route_lookup(net, skb_in, iph, saddr, tos,
@@ -613,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
room = dst_mtu(&rt->dst);
if (room > 576)
room = 576;
- room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
+ room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
room -= sizeof(struct icmphdr);
icmp_param.data_len = skb_in->len - icmp_param.offset;
@@ -637,7 +636,7 @@ EXPORT_SYMBOL(icmp_send);
static void icmp_unreach(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
struct icmphdr *icmph;
int hash, protocol;
const struct net_protocol *ipprot;
@@ -656,7 +655,7 @@ static void icmp_unreach(struct sk_buff *skb)
goto out_err;
icmph = icmp_hdr(skb);
- iph = (struct iphdr *)skb->data;
+ iph = (const struct iphdr *)skb->data;
if (iph->ihl < 5) /* Mangled header, drop. */
goto out_err;
@@ -729,7 +728,7 @@ static void icmp_unreach(struct sk_buff *skb)
if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
goto out;
- iph = (struct iphdr *)skb->data;
+ iph = (const struct iphdr *)skb->data;
protocol = iph->protocol;
/*
@@ -758,7 +757,7 @@ out_err:
static void icmp_redirect(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
if (skb->len < sizeof(struct iphdr))
goto out_err;
@@ -769,7 +768,7 @@ static void icmp_redirect(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
- iph = (struct iphdr *)skb->data;
+ iph = (const struct iphdr *)skb->data;
switch (icmp_hdr(skb)->code & 7) {
case ICMP_REDIR_NET:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1fd3d9ce8398..ec03c2fda6ce 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -309,6 +309,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct iphdr *pip;
struct igmpv3_report *pig;
struct net *net = dev_net(dev);
+ struct flowi4 fl4;
while (1) {
skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
@@ -321,18 +322,13 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
}
igmp_skb_size(skb) = size;
- rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0,
+ rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
0, 0,
IPPROTO_IGMP, 0, dev->ifindex);
if (IS_ERR(rt)) {
kfree_skb(skb);
return NULL;
}
- if (rt->rt_src == 0) {
- kfree_skb(skb);
- ip_rt_put(rt);
- return NULL;
- }
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
@@ -348,8 +344,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->tos = 0xc0;
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
- pip->daddr = rt->rt_dst;
- pip->saddr = rt->rt_src;
+ pip->daddr = fl4.daddr;
+ pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(pip, &rt->dst, NULL);
@@ -655,6 +651,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
struct net_device *dev = in_dev->dev;
struct net *net = dev_net(dev);
__be32 group = pmc ? pmc->multiaddr : 0;
+ struct flowi4 fl4;
__be32 dst;
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
@@ -664,17 +661,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
else
dst = group;
- rt = ip_route_output_ports(net, NULL, dst, 0,
+ rt = ip_route_output_ports(net, &fl4, NULL, dst, 0,
0, 0,
IPPROTO_IGMP, 0, dev->ifindex);
if (IS_ERR(rt))
return -1;
- if (rt->rt_src == 0) {
- ip_rt_put(rt);
- return -1;
- }
-
skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
if (skb == NULL) {
ip_rt_put(rt);
@@ -695,7 +687,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->daddr = dst;
- iph->saddr = rt->rt_src;
+ iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
ip_select_ident(iph, &rt->dst, NULL);
((u8*)&iph[1])[0] = IPOPT_RA;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 38f23e721b80..54944da2f794 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -354,26 +354,20 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
{
struct rtable *rt;
const struct inet_request_sock *ireq = inet_rsk(req);
- struct ip_options *opt = inet_rsk(req)->opt;
- struct flowi4 fl4 = {
- .flowi4_oif = sk->sk_bound_dev_if,
- .flowi4_mark = sk->sk_mark,
- .daddr = ((opt && opt->srr) ?
- opt->faddr : ireq->rmt_addr),
- .saddr = ireq->loc_addr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
- .flowi4_proto = sk->sk_protocol,
- .flowi4_flags = inet_sk_flowi_flags(sk),
- .fl4_sport = inet_sk(sk)->inet_sport,
- .fl4_dport = ireq->rmt_port,
- };
+ struct ip_options_rcu *opt = inet_rsk(req)->opt;
struct net *net = sock_net(sk);
+ struct flowi4 fl4;
+ flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+ (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
+ ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && fl4.daddr != rt->rt_gateway)
goto route_err;
return &rt->dst;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada17129fce..6ffe94ca5bc9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -124,7 +124,7 @@ static int inet_csk_diag_fill(struct sock *sk,
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
if (r->idiag_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
&np->rcv_saddr);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 47038cb6c138..85a0f75dae64 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -51,8 +51,8 @@ MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
* Basic tcp checks whether packet is suitable for LRO
*/
-static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph,
- int len, struct net_lro_desc *lro_desc)
+static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
+ int len, const struct net_lro_desc *lro_desc)
{
/* check ip header: don't aggregate padded frames */
if (ntohs(iph->tot_len) != len)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index da5941f18c3c..8871067560db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -413,11 +413,6 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
dev_net_set(dev, net);
- if (strchr(name, '%')) {
- if (dev_alloc_name(dev, name) < 0)
- goto failed_free;
- }
-
nt = netdev_priv(dev);
nt->parms = *parms;
dev->rtnl_link_ops = &ipgre_link_ops;
@@ -462,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
by themself???
*/
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
__be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
int grehlen = (iph->ihl<<2) + 4;
const int type = icmp_hdr(skb)->type;
@@ -534,7 +529,7 @@ out:
rcu_read_unlock();
}
-static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
+static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos)) {
if (skb->protocol == htons(ETH_P_IP)) {
@@ -546,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
}
static inline u8
-ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
+ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
{
u8 inner = 0;
if (skb->protocol == htons(ETH_P_IP))
inner = old_iph->tos;
else if (skb->protocol == htons(ETH_P_IPV6))
- inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
+ inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
return INET_ECN_encapsulate(tos, inner);
}
static int ipgre_rcv(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
u8 *h;
__be16 flags;
__sum16 csum = 0;
@@ -697,8 +692,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct pcpu_tstats *tstats;
- struct iphdr *old_iph = ip_hdr(skb);
- struct iphdr *tiph;
+ const struct iphdr *old_iph = ip_hdr(skb);
+ const struct iphdr *tiph;
+ struct flowi4 fl4;
u8 tos;
__be16 df;
struct rtable *rt; /* Route to the other host */
@@ -714,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
gre_hlen = 0;
- tiph = (struct iphdr *)skb->data;
+ tiph = (const struct iphdr *)skb->data;
} else {
gre_hlen = tunnel->hlen;
tiph = &tunnel->parms.iph;
@@ -735,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct in6_addr *addr6;
+ const struct in6_addr *addr6;
int addr_type;
struct neighbour *neigh = skb_dst(skb)->neighbour;
if (neigh == NULL)
goto tx_error;
- addr6 = (struct in6_addr *)&neigh->primary_key;
+ addr6 = (const struct in6_addr *)&neigh->primary_key;
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
@@ -766,10 +762,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (skb->protocol == htons(ETH_P_IP))
tos = old_iph->tos;
else if (skb->protocol == htons(ETH_P_IPV6))
- tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
+ tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
}
- rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr,
+ rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
tunnel->parms.o_key, RT_TOS(tos),
tunnel->parms.link);
if (IS_ERR(rt)) {
@@ -873,15 +869,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
iph->frag_off = df;
iph->protocol = IPPROTO_GRE;
iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
+ iph->daddr = fl4.daddr;
+ iph->saddr = fl4.saddr;
if ((iph->ttl = tiph->ttl) == 0) {
if (skb->protocol == htons(ETH_P_IP))
iph->ttl = old_iph->ttl;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
- iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
+ iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
#endif
else
iph->ttl = ip4_dst_hoplimit(&rt->dst);
@@ -927,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
struct ip_tunnel *tunnel;
- struct iphdr *iph;
+ const struct iphdr *iph;
int hlen = LL_MAX_HEADER;
int mtu = ETH_DATA_LEN;
int addend = sizeof(struct iphdr) + 4;
@@ -938,12 +934,14 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
/* Guess output device to choose reasonable mtu and needed_headroom */
if (iph->daddr) {
- struct rtable *rt = ip_route_output_gre(dev_net(dev),
- iph->daddr, iph->saddr,
- tunnel->parms.o_key,
- RT_TOS(iph->tos),
- tunnel->parms.link);
-
+ struct flowi4 fl4;
+ struct rtable *rt;
+
+ rt = ip_route_output_gre(dev_net(dev), &fl4,
+ iph->daddr, iph->saddr,
+ tunnel->parms.o_key,
+ RT_TOS(iph->tos),
+ tunnel->parms.link);
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
ip_rt_put(rt);
@@ -1180,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
{
- struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
+ const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
memcpy(haddr, &iph->saddr, 4);
return 4;
}
@@ -1196,13 +1194,15 @@ static int ipgre_open(struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
if (ipv4_is_multicast(t->parms.iph.daddr)) {
- struct rtable *rt = ip_route_output_gre(dev_net(dev),
- t->parms.iph.daddr,
- t->parms.iph.saddr,
- t->parms.o_key,
- RT_TOS(t->parms.iph.tos),
- t->parms.link);
-
+ struct flowi4 fl4;
+ struct rtable *rt;
+
+ rt = ip_route_output_gre(dev_net(dev), &fl4,
+ t->parms.iph.daddr,
+ t->parms.iph.saddr,
+ t->parms.o_key,
+ RT_TOS(t->parms.iph.tos),
+ t->parms.link);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
dev = rt->dst.dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d7b2b0987a3b..c8f48efc5fd3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb)
static inline int ip_rcv_options(struct sk_buff *skb)
{
struct ip_options *opt;
- struct iphdr *iph;
+ const struct iphdr *iph;
struct net_device *dev = skb->dev;
/* It looks as overkill, because not all
@@ -374,7 +374,7 @@ drop:
*/
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2391b24e8251..01fc40965848 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -36,7 +36,7 @@
* saddr is address of outgoing interface.
*/
-void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
+void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag)
{
unsigned char *iph = skb_network_header(skb);
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
* NOTE: dopt cannot point to skb.
*/
-int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
+int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
{
- struct ip_options *sopt;
+ const struct ip_options *sopt;
unsigned char *sptr, *dptr;
int soffset, doffset;
int optlen;
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
sopt = &(IPCB(skb)->opt);
- if (sopt->optlen == 0) {
- dopt->optlen = 0;
+ if (sopt->optlen == 0)
return 0;
- }
sptr = skb_network_header(skb);
dptr = dopt->__data;
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
dopt->optlen += optlen;
}
if (sopt->srr) {
- unsigned char * start = sptr+sopt->srr;
+ unsigned char *start = sptr+sopt->srr;
__be32 faddr;
optlen = start[1];
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt)
}
}
-static struct ip_options *ip_options_get_alloc(const int optlen)
+static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
{
- return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3),
+ return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
GFP_KERNEL);
}
-static int ip_options_get_finish(struct net *net, struct ip_options **optp,
- struct ip_options *opt, int optlen)
+static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
+ struct ip_options_rcu *opt, int optlen)
{
while (optlen & 3)
- opt->__data[optlen++] = IPOPT_END;
- opt->optlen = optlen;
- if (optlen && ip_options_compile(net, opt, NULL)) {
+ opt->opt.__data[optlen++] = IPOPT_END;
+ opt->opt.optlen = optlen;
+ if (optlen && ip_options_compile(net, &opt->opt, NULL)) {
kfree(opt);
return -EINVAL;
}
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp,
return 0;
}
-int ip_options_get_from_user(struct net *net, struct ip_options **optp,
+int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
unsigned char __user *data, int optlen)
{
- struct ip_options *opt = ip_options_get_alloc(optlen);
+ struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
if (!opt)
return -ENOMEM;
- if (optlen && copy_from_user(opt->__data, data, optlen)) {
+ if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
kfree(opt);
return -EFAULT;
}
return ip_options_get_finish(net, optp, opt, optlen);
}
-int ip_options_get(struct net *net, struct ip_options **optp,
+int ip_options_get(struct net *net, struct ip_options_rcu **optp,
unsigned char *data, int optlen)
{
- struct ip_options *opt = ip_options_get_alloc(optlen);
+ struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
if (!opt)
return -ENOMEM;
if (optlen)
- memcpy(opt->__data, data, optlen);
+ memcpy(opt->opt.__data, data, optlen);
return ip_options_get_finish(net, optp, opt, optlen);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 459c011b1d4a..db38c1822de8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
*
*/
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
- __be32 saddr, __be32 daddr, struct ip_options *opt)
+ __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
{
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = skb_rtable(skb);
struct iphdr *iph;
/* Build the IP header. */
- skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
iph->version = 4;
@@ -158,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
else
iph->frag_off = 0;
iph->ttl = ip_select_ttl(inet, &rt->dst);
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
+ iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
+ iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
ip_select_ident(iph, &rt->dst, sk);
- if (opt && opt->optlen) {
- iph->ihl += opt->optlen>>2;
- ip_options_build(skb, opt, daddr, rt, 0);
+ if (opt && opt->opt.optlen) {
+ iph->ihl += opt->opt.optlen>>2;
+ ip_options_build(skb, &opt->opt, daddr, rt, 0);
}
skb->priority = sk->sk_priority;
@@ -316,7 +316,7 @@ int ip_queue_xmit(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
- struct ip_options *opt = inet->opt;
+ struct ip_options_rcu *inet_opt;
struct rtable *rt;
struct iphdr *iph;
int res;
@@ -325,6 +325,7 @@ int ip_queue_xmit(struct sk_buff *skb)
* f.e. by something like SCTP.
*/
rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
rt = skb_rtable(skb);
if (rt != NULL)
goto packet_routed;
@@ -332,18 +333,19 @@ int ip_queue_xmit(struct sk_buff *skb)
/* Make sure we can route this packet. */
rt = (struct rtable *)__sk_dst_check(sk, 0);
if (rt == NULL) {
+ struct flowi4 fl4;
__be32 daddr;
/* Use correct destination address if we have options. */
daddr = inet->inet_daddr;
- if(opt && opt->srr)
- daddr = opt->faddr;
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
* itself out.
*/
- rt = ip_route_output_ports(sock_net(sk), sk,
+ rt = ip_route_output_ports(sock_net(sk), &fl4, sk,
daddr, inet->inet_saddr,
inet->inet_dport,
inet->inet_sport,
@@ -357,11 +359,11 @@ int ip_queue_xmit(struct sk_buff *skb)
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
- skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
@@ -375,9 +377,9 @@ packet_routed:
iph->daddr = rt->rt_dst;
/* Transport layer set skb->h.foo itself. */
- if (opt && opt->optlen) {
- iph->ihl += opt->optlen >> 2;
- ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
+ if (inet_opt && inet_opt->opt.optlen) {
+ iph->ihl += inet_opt->opt.optlen >> 2;
+ ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
ip_select_ident_more(iph, &rt->dst, sk,
@@ -1033,7 +1035,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
struct ipcm_cookie *ipc, struct rtable **rtp)
{
struct inet_sock *inet = inet_sk(sk);
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
/*
@@ -1047,7 +1049,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
if (unlikely(cork->opt == NULL))
return -ENOBUFS;
}
- memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen);
+ memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
cork->flags |= IPCORK_OPT;
cork->addr = ipc->addr;
}
@@ -1451,39 +1453,34 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
unsigned int len)
{
struct inet_sock *inet = inet_sk(sk);
- struct {
- struct ip_options opt;
- char data[40];
- } replyopts;
+ struct ip_options_data replyopts;
struct ipcm_cookie ipc;
__be32 daddr;
struct rtable *rt = skb_rtable(skb);
- if (ip_options_echo(&replyopts.opt, skb))
+ if (ip_options_echo(&replyopts.opt.opt, skb))
return;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
ipc.tx_flags = 0;
- if (replyopts.opt.optlen) {
+ if (replyopts.opt.opt.optlen) {
ipc.opt = &replyopts.opt;
- if (ipc.opt->srr)
- daddr = replyopts.opt.faddr;
+ if (replyopts.opt.opt.srr)
+ daddr = replyopts.opt.opt.faddr;
}
{
- struct flowi4 fl4 = {
- .flowi4_oif = arg->bound_dev_if,
- .daddr = daddr,
- .saddr = rt->rt_spec_dst,
- .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
- .fl4_sport = tcp_hdr(skb)->dest,
- .fl4_dport = tcp_hdr(skb)->source,
- .flowi4_proto = sk->sk_protocol,
- .flowi4_flags = ip_reply_arg_flowi_flags(arg),
- };
+ struct flowi4 fl4;
+
+ flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+ RT_TOS(ip_hdr(skb)->tos),
+ RT_SCOPE_UNIVERSE, sk->sk_protocol,
+ ip_reply_arg_flowi_flags(arg),
+ daddr, rt->rt_spec_dst,
+ tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3948c86e59ca..ab0c9efd1efa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -131,7 +131,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
struct sockaddr_in sin;
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
__be16 *ports = (__be16 *)skb_transport_header(skb);
if (skb_transport_offset(skb) + 4 > skb->len)
@@ -451,6 +451,11 @@ out:
}
+static void opt_kfree_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ip_options_rcu, rcu));
+}
+
/*
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
switch (optname) {
case IP_OPTIONS:
{
- struct ip_options *opt = NULL;
+ struct ip_options_rcu *old, *opt = NULL;
+
if (optlen > 40)
goto e_inval;
err = ip_options_get_from_user(sock_net(sk), &opt,
optval, optlen);
if (err)
break;
+ old = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
if (inet->is_icsk) {
struct inet_connection_sock *icsk = inet_csk(sk);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
(TCPF_LISTEN | TCPF_CLOSE)) &&
inet->inet_daddr != LOOPBACK4_IPV6)) {
#endif
- if (inet->opt)
- icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+ if (old)
+ icsk->icsk_ext_hdr_len -= old->opt.optlen;
if (opt)
- icsk->icsk_ext_hdr_len += opt->optlen;
+ icsk->icsk_ext_hdr_len += opt->opt.optlen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
}
#endif
}
- opt = xchg(&inet->opt, opt);
- kfree(opt);
+ rcu_assign_pointer(inet->inet_opt, opt);
+ if (old)
+ call_rcu(&old->rcu, opt_kfree_rcu);
break;
}
case IP_PKTINFO:
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_OPTIONS:
{
unsigned char optbuf[sizeof(struct ip_options)+40];
- struct ip_options * opt = (struct ip_options *)optbuf;
+ struct ip_options *opt = (struct ip_options *)optbuf;
+ struct ip_options_rcu *inet_opt;
+
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
opt->optlen = 0;
- if (inet->opt)
- memcpy(optbuf, inet->opt,
- sizeof(struct ip_options)+
- inet->opt->optlen);
+ if (inet_opt)
+ memcpy(optbuf, &inet_opt->opt,
+ sizeof(struct ip_options) +
+ inet_opt->opt.optlen);
release_sock(sk);
if (opt->optlen == 0)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 629067571f02..c857f6f49b03 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -27,7 +27,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
__be32 spi;
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
return;
spi = htonl(ntohs(ipch->cpi));
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr,
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET);
if (!x)
return;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfc17c5914e7..378b20b7ca6e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -276,11 +276,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
dev_net_set(dev, net);
- if (strchr(name, '%')) {
- if (dev_alloc_name(dev, name) < 0)
- goto failed_free;
- }
-
nt = netdev_priv(dev);
nt->parms = *parms;
@@ -319,7 +314,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
8 bytes of packet payload. It means, that precise relaying of
ICMP in the real Internet is absolutely infeasible.
*/
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
@@ -433,15 +428,16 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct pcpu_tstats *tstats;
- struct iphdr *tiph = &tunnel->parms.iph;
+ const struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
+ const struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
+ struct flowi4 fl4;
int mtu;
if (skb->protocol != htons(ETH_P_IP))
@@ -460,7 +456,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_error_icmp;
}
- rt = ip_route_output_ports(dev_net(dev), NULL,
+ rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
dst, tiph->saddr,
0, 0,
IPPROTO_IPIP, RT_TOS(tos),
@@ -549,8 +545,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
iph->frag_off = df;
iph->protocol = IPPROTO_IPIP;
iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
+ iph->daddr = fl4.daddr;
+ iph->saddr = fl4.saddr;
if ((iph->ttl = tiph->ttl) == 0)
iph->ttl = old_iph->ttl;
@@ -572,19 +568,21 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
struct ip_tunnel *tunnel;
- struct iphdr *iph;
+ const struct iphdr *iph;
tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
if (iph->daddr) {
- struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL,
- iph->daddr, iph->saddr,
- 0, 0,
- IPPROTO_IPIP,
- RT_TOS(iph->tos),
- tunnel->parms.link);
-
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+ iph->daddr, iph->saddr,
+ 0, 0,
+ IPPROTO_IPIP,
+ RT_TOS(iph->tos),
+ tunnel->parms.link);
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
ip_rt_put(rt);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1f62eaeb6de4..30a7763c400e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1549,7 +1549,7 @@ static struct notifier_block ip_mr_notifier = {
static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct iphdr *iph;
- struct iphdr *old_iph = ip_hdr(skb);
+ const struct iphdr *old_iph = ip_hdr(skb);
skb_push(skb, sizeof(struct iphdr));
skb->transport_header = skb->network_header;
@@ -1595,6 +1595,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct rtable *rt;
+ struct flowi4 fl4;
int encap = 0;
if (vif->dev == NULL)
@@ -1612,7 +1613,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
#endif
if (vif->flags & VIFF_TUNNEL) {
- rt = ip_route_output_ports(net, NULL,
+ rt = ip_route_output_ports(net, &fl4, NULL,
vif->remote, vif->local,
0, 0,
IPPROTO_IPIP,
@@ -1621,7 +1622,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
goto out_free;
encap = sizeof(struct iphdr);
} else {
- rt = ip_route_output_ports(net, NULL, iph->daddr, 0,
+ rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
0, 0,
IPPROTO_IPIP,
RT_TOS(iph->tos), vif->link);
@@ -1788,12 +1789,14 @@ dont_forward:
return 0;
}
-static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt)
+static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
{
+ struct rtable *rt = skb_rtable(skb);
+ struct iphdr *iph = ip_hdr(skb);
struct flowi4 fl4 = {
- .daddr = rt->rt_key_dst,
- .saddr = rt->rt_key_src,
- .flowi4_tos = rt->rt_tos,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowi4_tos = iph->tos,
.flowi4_oif = rt->rt_oif,
.flowi4_iif = rt->rt_iif,
.flowi4_mark = rt->rt_mark,
@@ -1825,7 +1828,7 @@ int ip_mr_input(struct sk_buff *skb)
if (IPCB(skb)->flags & IPSKB_FORWARDED)
goto dont_forward;
- mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
+ mrt = ipmr_rt_fib_lookup(net, skb);
if (IS_ERR(mrt)) {
kfree_skb(skb);
return PTR_ERR(mrt);
@@ -1957,7 +1960,7 @@ int pim_rcv_v1(struct sk_buff *skb)
pim = igmp_hdr(skb);
- mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
+ mrt = ipmr_rt_fib_lookup(net, skb);
if (IS_ERR(mrt))
goto drop;
if (!mrt->mroute_do_pim ||
@@ -1989,7 +1992,7 @@ static int pim_rcv(struct sk_buff *skb)
csum_fold(skb_checksum(skb, 0, skb->len, 0))))
goto drop;
- mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
+ mrt = ipmr_rt_fib_lookup(net, skb);
if (IS_ERR(mrt))
goto drop;
if (__pim_rcv(mrt, skb, sizeof(*pim))) {
@@ -2038,20 +2041,20 @@ rtattr_failure:
return -EMSGSIZE;
}
-int ipmr_get_route(struct net *net,
- struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ipmr_get_route(struct net *net, struct sk_buff *skb,
+ __be32 saddr, __be32 daddr,
+ struct rtmsg *rtm, int nowait)
{
- int err;
- struct mr_table *mrt;
struct mfc_cache *cache;
- struct rtable *rt = skb_rtable(skb);
+ struct mr_table *mrt;
+ int err;
mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
if (mrt == NULL)
return -ENOENT;
rcu_read_lock();
- cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
+ cache = ipmr_cache_find(mrt, saddr, daddr);
if (cache == NULL) {
struct sk_buff *skb2;
@@ -2084,8 +2087,8 @@ int ipmr_get_route(struct net *net,
skb_reset_network_header(skb2);
iph = ip_hdr(skb2);
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->saddr = rt->rt_src;
- iph->daddr = rt->rt_dst;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
iph->version = 0;
err = ipmr_cache_unresolved(mrt, vif, skb2);
read_unlock(&mrt_lock);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 89bc7e66d598..fd7a3f68917f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
void *table_base;
const struct xt_table_info *private;
struct xt_action_param acpar;
+ unsigned int addend;
if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- xt_info_rdlock_bh();
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
private = table->private;
table_base = private->entries[smp_processor_id()];
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
/* Verdict */
break;
} while (!acpar.hotdrop);
- xt_info_rdunlock_bh();
+ xt_write_recseq_end(addend);
+ local_bh_enable();
if (acpar.hotdrop)
return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
unsigned int i;
for_each_possible_cpu(cpu) {
- seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
unsigned int start;
do {
- start = read_seqbegin(lock);
+ start = read_seqcount_begin(s);
bcnt = iter->counters.bcnt;
pcnt = iter->counters.pcnt;
- } while (read_seqretry(lock, start));
+ } while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
int ret = 0;
void *loc_cpu_entry;
struct arpt_entry *iter;
+ unsigned int addend;
#ifdef CONFIG_COMPAT
struct compat_xt_counters_info compat_tmp;
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
/* Choose the copy that is on our node */
curcpu = smp_processor_id();
loc_cpu_entry = private->entries[curcpu];
- xt_info_wrlock(curcpu);
+ addend = xt_write_recseq_begin();
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
- xt_info_wrunlock(curcpu);
+ xt_write_recseq_end(addend);
unlock_up_free:
local_bh_enable();
xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 704915028009..764743843503 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
}
EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
-/*
- We keep a set of rules for each CPU, so we can avoid write-locking
- them in the softirq when updating the counters and therefore
- only need to read-lock in the softirq; doing a write_lock_bh() in user
- context stops packets coming through and allows user context to read
- the counters or update the rules.
-
- Hence the start of any table is given by get_table() below. */
-
/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
unsigned int *stackptr, origptr, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
+ unsigned int addend;
/* Initialization */
ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
acpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- xt_info_rdlock_bh();
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
private = table->private;
cpu = smp_processor_id();
table_base = private->entries[cpu];
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
pr_debug("Exiting %s; resetting sp from %u to %u\n",
__func__, *stackptr, origptr);
*stackptr = origptr;
- xt_info_rdunlock_bh();
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
#else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
unsigned int i;
for_each_possible_cpu(cpu) {
- seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
unsigned int start;
do {
- start = read_seqbegin(lock);
+ start = read_seqcount_begin(s);
bcnt = iter->counters.bcnt;
pcnt = iter->counters.pcnt;
- } while (read_seqretry(lock, start));
+ } while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
int ret = 0;
void *loc_cpu_entry;
struct ipt_entry *iter;
+ unsigned int addend;
#ifdef CONFIG_COMPAT
struct compat_xt_counters_info compat_tmp;
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
/* Choose the copy that is on our node */
curcpu = smp_processor_id();
loc_cpu_entry = private->entries[curcpu];
- xt_info_wrlock(curcpu);
+ addend = xt_write_recseq_begin();
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
- xt_info_wrunlock(curcpu);
+ xt_write_recseq_end(addend);
unlock_up_free:
local_bh_enable();
xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 31427fb57aa8..99cfa28b6d38 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,7 +153,7 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
-static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data,
+static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
int datalen, __sum16 *check, int oldlen)
{
struct rtable *rt = skb_rtable(skb);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bceaec42c37d..a8659e0c4a6e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -154,7 +154,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
-static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
struct sock *sk;
struct hlist_head *head;
@@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
}
if (inet->recverr) {
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
u8 *payload = skb->data + (iph->ihl << 2);
if (inet->hdrincl)
@@ -265,7 +265,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
{
int hash;
struct sock *raw_sk;
- struct iphdr *iph;
+ const struct iphdr *iph;
struct net *net;
hash = protocol & (RAW_HTABLE_SIZE - 1);
@@ -273,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
read_lock(&raw_v4_hashinfo.lock);
raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
if (raw_sk != NULL) {
- iph = (struct iphdr *)skb->data;
+ iph = (const struct iphdr *)skb->data;
net = dev_net(skb->dev);
while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
@@ -281,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
skb->dev->ifindex)) != NULL) {
raw_err(raw_sk, skb, info);
raw_sk = sk_next(raw_sk);
- iph = (struct iphdr *)skb->data;
+ iph = (const struct iphdr *)skb->data;
}
}
read_unlock(&raw_v4_hashinfo.lock);
@@ -460,6 +460,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
__be32 saddr;
u8 tos;
int err;
+ struct ip_options_data opt_copy;
err = -EMSGSIZE;
if (len > 0xFFFF)
@@ -520,8 +521,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
saddr = ipc.addr;
ipc.addr = daddr;
- if (!ipc.opt)
- ipc.opt = inet->opt;
+ if (!ipc.opt) {
+ struct ip_options_rcu *inet_opt;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ memcpy(&opt_copy, inet_opt,
+ sizeof(*inet_opt) + inet_opt->opt.optlen);
+ ipc.opt = &opt_copy.opt;
+ }
+ rcu_read_unlock();
+ }
if (ipc.opt) {
err = -EINVAL;
@@ -530,10 +541,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
*/
if (inet->hdrincl)
goto done;
- if (ipc.opt->srr) {
+ if (ipc.opt->opt.srr) {
if (!daddr)
goto done;
- daddr = ipc.opt->faddr;
+ daddr = ipc.opt->opt.faddr;
}
}
tos = RT_CONN_FLAGS(sk);
@@ -548,17 +559,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
{
- struct flowi4 fl4 = {
- .flowi4_oif = ipc.oif,
- .flowi4_mark = sk->sk_mark,
- .daddr = daddr,
- .saddr = saddr,
- .flowi4_tos = tos,
- .flowi4_proto = (inet->hdrincl ?
- IPPROTO_RAW :
- sk->sk_protocol),
- .flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
- };
+ struct flowi4 fl4;
+
+ flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+ RT_SCOPE_UNIVERSE,
+ inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
+
if (!inet->hdrincl) {
err = raw_probe_proto_opt(&fl4, msg);
if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 99e6e4bb1c72..6a83840b16af 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -424,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
dst_metric(&r->dst, RTAX_WINDOW),
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
dst_metric(&r->dst, RTAX_RTTVAR)),
- r->rt_tos,
+ r->rt_key_tos,
r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
r->dst.hh ? (r->dst.hh->hh_output ==
dev_queue_xmit) : 0,
@@ -724,7 +724,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
(rt1->rt_mark ^ rt2->rt_mark) |
- (rt1->rt_tos ^ rt2->rt_tos) |
+ (rt1->rt_key_tos ^ rt2->rt_key_tos) |
(rt1->rt_oif ^ rt2->rt_oif) |
(rt1->rt_iif ^ rt2->rt_iif)) == 0;
}
@@ -1349,7 +1349,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
rt_genid(dev_net(dst->dev)));
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
- &rt->rt_dst, rt->rt_tos);
+ &rt->rt_dst, rt->rt_key_tos);
#endif
rt_del(hash, rt);
ret = NULL;
@@ -1507,7 +1507,7 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
return 68;
}
-unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
+unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
unsigned short new_mtu,
struct net_device *dev)
{
@@ -1710,7 +1710,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = rt->rt_key_dst,
.saddr = rt->rt_key_src,
- .flowi4_tos = rt->rt_tos,
+ .flowi4_tos = rt->rt_key_tos,
.flowi4_oif = rt->rt_oif,
.flowi4_iif = rt->rt_iif,
.flowi4_mark = rt->rt_mark,
@@ -1767,7 +1767,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
return mtu;
}
-static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
+static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
struct fib_info *fi)
{
struct inet_peer *peer;
@@ -1776,7 +1776,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
/* If a peer entry exists for this destination, we must hook
* it up in order to get at cached metrics.
*/
- if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
+ if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
create = 1;
rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
@@ -1803,7 +1803,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
}
}
-static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
+static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
const struct fib_result *res,
struct fib_info *fi, u16 type, u32 itag)
{
@@ -1813,7 +1813,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
if (FIB_RES_GW(*res) &&
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
- rt_init_metrics(rt, oldflp4, fi);
+ rt_init_metrics(rt, fl4, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
#endif
@@ -1830,20 +1830,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
#endif
set_class_tag(rt, itag);
#endif
- rt->rt_type = type;
}
-static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm)
+static struct rtable *rt_dst_alloc(struct net_device *dev,
+ bool nopolicy, bool noxfrm)
{
- struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1);
- if (rt) {
- rt->dst.obsolete = -1;
-
- rt->dst.flags = DST_HOST |
- (nopolicy ? DST_NOPOLICY : 0) |
- (noxfrm ? DST_NOXFRM : 0);
- }
- return rt;
+ return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
+ DST_HOST |
+ (nopolicy ? DST_NOPOLICY : 0) |
+ (noxfrm ? DST_NOXFRM : 0));
}
/* called in rcu_read_lock() section */
@@ -1871,36 +1866,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto e_inval;
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
} else {
- err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
- &itag, 0);
+ err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
+ &itag);
if (err < 0)
goto e_err;
}
- rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ rth = rt_dst_alloc(init_net.loopback_dev,
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
if (!rth)
goto e_nobufs;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ rth->dst.tclassid = itag;
+#endif
rth->dst.output = ip_rt_bug;
rth->rt_key_dst = daddr;
- rth->rt_dst = daddr;
- rth->rt_tos = tos;
- rth->rt_mark = skb->mark;
rth->rt_key_src = saddr;
+ rth->rt_genid = rt_genid(dev_net(dev));
+ rth->rt_flags = RTCF_MULTICAST;
+ rth->rt_type = RTN_MULTICAST;
+ rth->rt_key_tos = tos;
+ rth->rt_dst = daddr;
rth->rt_src = saddr;
-#ifdef CONFIG_IP_ROUTE_CLASSID
- rth->dst.tclassid = itag;
-#endif
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
- rth->dst.dev = init_net.loopback_dev;
- dev_hold(rth->dst.dev);
rth->rt_oif = 0;
+ rth->rt_mark = skb->mark;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
- rth->rt_genid = rt_genid(dev_net(dev));
- rth->rt_flags = RTCF_MULTICAST;
- rth->rt_type = RTN_MULTICAST;
+ rth->rt_peer_genid = 0;
+ rth->peer = NULL;
+ rth->fi = NULL;
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1981,8 +1978,8 @@ static int __mkroute_input(struct sk_buff *skb,
}
- err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
- in_dev->dev, &spec_dst, &itag, skb->mark);
+ err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
+ in_dev->dev, &spec_dst, &itag);
if (err < 0) {
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -2013,7 +2010,8 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
+ rth = rt_dst_alloc(out_dev->dev,
+ IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(out_dev, NOXFRM));
if (!rth) {
err = -ENOBUFS;
@@ -2021,27 +2019,28 @@ static int __mkroute_input(struct sk_buff *skb,
}
rth->rt_key_dst = daddr;
- rth->rt_dst = daddr;
- rth->rt_tos = tos;
- rth->rt_mark = skb->mark;
rth->rt_key_src = saddr;
+ rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
+ rth->rt_flags = flags;
+ rth->rt_type = res->type;
+ rth->rt_key_tos = tos;
+ rth->rt_dst = daddr;
rth->rt_src = saddr;
- rth->rt_gateway = daddr;
rth->rt_route_iif = in_dev->dev->ifindex;
rth->rt_iif = in_dev->dev->ifindex;
- rth->dst.dev = (out_dev)->dev;
- dev_hold(rth->dst.dev);
rth->rt_oif = 0;
+ rth->rt_mark = skb->mark;
+ rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
+ rth->rt_peer_genid = 0;
+ rth->peer = NULL;
+ rth->fi = NULL;
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
- rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
- rth->rt_flags = flags;
-
*result = rth;
err = 0;
cleanup:
@@ -2150,9 +2149,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
if (res.type == RTN_LOCAL) {
- err = fib_validate_source(saddr, daddr, tos,
+ err = fib_validate_source(skb, saddr, daddr, tos,
net->loopback_dev->ifindex,
- dev, &spec_dst, &itag, skb->mark);
+ dev, &spec_dst, &itag);
if (err < 0)
goto martian_source_keep_err;
if (err)
@@ -2176,8 +2175,8 @@ brd_input:
if (ipv4_is_zeronet(saddr))
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
else {
- err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
- &itag, skb->mark);
+ err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
+ &itag);
if (err < 0)
goto martian_source_keep_err;
if (err)
@@ -2188,36 +2187,42 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
- rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ rth = rt_dst_alloc(net->loopback_dev,
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
if (!rth)
goto e_nobufs;
+ rth->dst.input= ip_local_deliver;
rth->dst.output= ip_rt_bug;
- rth->rt_genid = rt_genid(net);
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ rth->dst.tclassid = itag;
+#endif
rth->rt_key_dst = daddr;
- rth->rt_dst = daddr;
- rth->rt_tos = tos;
- rth->rt_mark = skb->mark;
rth->rt_key_src = saddr;
+ rth->rt_genid = rt_genid(net);
+ rth->rt_flags = flags|RTCF_LOCAL;
+ rth->rt_type = res.type;
+ rth->rt_key_tos = tos;
+ rth->rt_dst = daddr;
rth->rt_src = saddr;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
- rth->dst.dev = net->loopback_dev;
- dev_hold(rth->dst.dev);
+ rth->rt_oif = 0;
+ rth->rt_mark = skb->mark;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
- rth->dst.input= ip_local_deliver;
- rth->rt_flags = flags|RTCF_LOCAL;
+ rth->rt_peer_genid = 0;
+ rth->peer = NULL;
+ rth->fi = NULL;
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
- rth->rt_type = res.type;
hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
err = 0;
@@ -2288,7 +2293,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
(rth->rt_iif ^ iif) |
rth->rt_oif |
- (rth->rt_tos ^ tos)) == 0 &&
+ (rth->rt_key_tos ^ tos)) == 0 &&
rth->rt_mark == skb->mark &&
net_eq(dev_net(rth->dst.dev), net) &&
!rt_is_expired(rth)) {
@@ -2349,12 +2354,12 @@ EXPORT_SYMBOL(ip_route_input_common);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
const struct flowi4 *fl4,
- const struct flowi4 *oldflp4,
- struct net_device *dev_out,
+ __be32 orig_daddr, __be32 orig_saddr,
+ int orig_oif, struct net_device *dev_out,
unsigned int flags)
{
struct fib_info *fi = res->fi;
- u32 tos = RT_FL_TOS(oldflp4);
+ u32 tos = RT_FL_TOS(fl4);
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
@@ -2381,8 +2386,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fi = NULL;
} else if (type == RTN_MULTICAST) {
flags |= RTCF_MULTICAST | RTCF_LOCAL;
- if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr,
- oldflp4->flowi4_proto))
+ if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
+ fl4->flowi4_proto))
flags &= ~RTCF_LOCAL;
/* If multicast route do not exist use
* default one, but do not gateway in this case.
@@ -2392,29 +2397,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fi = NULL;
}
- rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
+ rth = rt_dst_alloc(dev_out,
+ IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM));
if (!rth)
return ERR_PTR(-ENOBUFS);
- rth->rt_key_dst = oldflp4->daddr;
- rth->rt_tos = tos;
- rth->rt_key_src = oldflp4->saddr;
- rth->rt_oif = oldflp4->flowi4_oif;
- rth->rt_mark = oldflp4->flowi4_mark;
+ rth->dst.output = ip_output;
+
+ rth->rt_key_dst = orig_daddr;
+ rth->rt_key_src = orig_saddr;
+ rth->rt_genid = rt_genid(dev_net(dev_out));
+ rth->rt_flags = flags;
+ rth->rt_type = type;
+ rth->rt_key_tos = tos;
rth->rt_dst = fl4->daddr;
rth->rt_src = fl4->saddr;
rth->rt_route_iif = 0;
- rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex;
- /* get references to the devices that are to be hold by the routing
- cache entry */
- rth->dst.dev = dev_out;
- dev_hold(dev_out);
+ rth->rt_iif = orig_oif ? : dev_out->ifindex;
+ rth->rt_oif = orig_oif;
+ rth->rt_mark = fl4->flowi4_mark;
rth->rt_gateway = fl4->daddr;
rth->rt_spec_dst= fl4->saddr;
-
- rth->dst.output=ip_output;
- rth->rt_genid = rt_genid(dev_net(dev_out));
+ rth->rt_peer_genid = 0;
+ rth->peer = NULL;
+ rth->fi = NULL;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2432,7 +2439,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
#ifdef CONFIG_IP_MROUTE
if (type == RTN_MULTICAST) {
if (IN_DEV_MFORWARD(in_dev) &&
- !ipv4_is_local_multicast(oldflp4->daddr)) {
+ !ipv4_is_local_multicast(fl4->daddr)) {
rth->dst.input = ip_mr_input;
rth->dst.output = ip_mc_output;
}
@@ -2440,9 +2447,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
#endif
}
- rt_set_nexthop(rth, oldflp4, res, fi, type, 0);
+ rt_set_nexthop(rth, fl4, res, fi, type, 0);
- rth->rt_flags = flags;
return rth;
}
@@ -2451,36 +2457,37 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
* called with rcu_read_lock();
*/
-static struct rtable *ip_route_output_slow(struct net *net,
- const struct flowi4 *oldflp4)
+static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
{
- u32 tos = RT_FL_TOS(oldflp4);
- struct flowi4 fl4;
- struct fib_result res;
- unsigned int flags = 0;
struct net_device *dev_out = NULL;
+ u32 tos = RT_FL_TOS(fl4);
+ unsigned int flags = 0;
+ struct fib_result res;
struct rtable *rth;
+ __be32 orig_daddr;
+ __be32 orig_saddr;
+ int orig_oif;
res.fi = NULL;
#ifdef CONFIG_IP_MULTIPLE_TABLES
res.r = NULL;
#endif
- fl4.flowi4_oif = oldflp4->flowi4_oif;
- fl4.flowi4_iif = net->loopback_dev->ifindex;
- fl4.flowi4_mark = oldflp4->flowi4_mark;
- fl4.daddr = oldflp4->daddr;
- fl4.saddr = oldflp4->saddr;
- fl4.flowi4_tos = tos & IPTOS_RT_MASK;
- fl4.flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+ orig_daddr = fl4->daddr;
+ orig_saddr = fl4->saddr;
+ orig_oif = fl4->flowi4_oif;
+
+ fl4->flowi4_iif = net->loopback_dev->ifindex;
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+ fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
rcu_read_lock();
- if (oldflp4->saddr) {
+ if (fl4->saddr) {
rth = ERR_PTR(-EINVAL);
- if (ipv4_is_multicast(oldflp4->saddr) ||
- ipv4_is_lbcast(oldflp4->saddr) ||
- ipv4_is_zeronet(oldflp4->saddr))
+ if (ipv4_is_multicast(fl4->saddr) ||
+ ipv4_is_lbcast(fl4->saddr) ||
+ ipv4_is_zeronet(fl4->saddr))
goto out;
/* I removed check for oif == dev_out->oif here.
@@ -2491,11 +2498,11 @@ static struct rtable *ip_route_output_slow(struct net *net,
of another iface. --ANK
*/
- if (oldflp4->flowi4_oif == 0 &&
- (ipv4_is_multicast(oldflp4->daddr) ||
- ipv4_is_lbcast(oldflp4->daddr))) {
+ if (fl4->flowi4_oif == 0 &&
+ (ipv4_is_multicast(fl4->daddr) ||
+ ipv4_is_lbcast(fl4->daddr))) {
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
- dev_out = __ip_dev_find(net, oldflp4->saddr, false);
+ dev_out = __ip_dev_find(net, fl4->saddr, false);
if (dev_out == NULL)
goto out;
@@ -2514,20 +2521,20 @@ static struct rtable *ip_route_output_slow(struct net *net,
Luckily, this hack is good workaround.
*/
- fl4.flowi4_oif = dev_out->ifindex;
+ fl4->flowi4_oif = dev_out->ifindex;
goto make_route;
}
- if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
+ if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
- if (!__ip_dev_find(net, oldflp4->saddr, false))
+ if (!__ip_dev_find(net, fl4->saddr, false))
goto out;
}
}
- if (oldflp4->flowi4_oif) {
- dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif);
+ if (fl4->flowi4_oif) {
+ dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
rth = ERR_PTR(-ENODEV);
if (dev_out == NULL)
goto out;
@@ -2537,37 +2544,37 @@ static struct rtable *ip_route_output_slow(struct net *net,
rth = ERR_PTR(-ENETUNREACH);
goto out;
}
- if (ipv4_is_local_multicast(oldflp4->daddr) ||
- ipv4_is_lbcast(oldflp4->daddr)) {
- if (!fl4.saddr)
- fl4.saddr = inet_select_addr(dev_out, 0,
- RT_SCOPE_LINK);
+ if (ipv4_is_local_multicast(fl4->daddr) ||
+ ipv4_is_lbcast(fl4->daddr)) {
+ if (!fl4->saddr)
+ fl4->saddr = inet_select_addr(dev_out, 0,
+ RT_SCOPE_LINK);
goto make_route;
}
- if (!fl4.saddr) {
- if (ipv4_is_multicast(oldflp4->daddr))
- fl4.saddr = inet_select_addr(dev_out, 0,
- fl4.flowi4_scope);
- else if (!oldflp4->daddr)
- fl4.saddr = inet_select_addr(dev_out, 0,
- RT_SCOPE_HOST);
+ if (fl4->saddr) {
+ if (ipv4_is_multicast(fl4->daddr))
+ fl4->saddr = inet_select_addr(dev_out, 0,
+ fl4->flowi4_scope);
+ else if (!fl4->daddr)
+ fl4->saddr = inet_select_addr(dev_out, 0,
+ RT_SCOPE_HOST);
}
}
- if (!fl4.daddr) {
- fl4.daddr = fl4.saddr;
- if (!fl4.daddr)
- fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK);
+ if (!fl4->daddr) {
+ fl4->daddr = fl4->saddr;
+ if (!fl4->daddr)
+ fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
dev_out = net->loopback_dev;
- fl4.flowi4_oif = net->loopback_dev->ifindex;
+ fl4->flowi4_oif = net->loopback_dev->ifindex;
res.type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
}
- if (fib_lookup(net, &fl4, &res)) {
+ if (fib_lookup(net, fl4, &res)) {
res.fi = NULL;
- if (oldflp4->flowi4_oif) {
+ if (fl4->flowi4_oif) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2586,9 +2593,9 @@ static struct rtable *ip_route_output_slow(struct net *net,
likely IPv6, but we do not.
*/
- if (fl4.saddr == 0)
- fl4.saddr = inet_select_addr(dev_out, 0,
- RT_SCOPE_LINK);
+ if (fl4->saddr == 0)
+ fl4->saddr = inet_select_addr(dev_out, 0,
+ RT_SCOPE_LINK);
res.type = RTN_UNICAST;
goto make_route;
}
@@ -2597,42 +2604,45 @@ static struct rtable *ip_route_output_slow(struct net *net,
}
if (res.type == RTN_LOCAL) {
- if (!fl4.saddr) {
+ if (!fl4->saddr) {
if (res.fi->fib_prefsrc)
- fl4.saddr = res.fi->fib_prefsrc;
+ fl4->saddr = res.fi->fib_prefsrc;
else
- fl4.saddr = fl4.daddr;
+ fl4->saddr = fl4->daddr;
}
dev_out = net->loopback_dev;
- fl4.flowi4_oif = dev_out->ifindex;
+ fl4->flowi4_oif = dev_out->ifindex;
res.fi = NULL;
flags |= RTCF_LOCAL;
goto make_route;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0)
+ if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
fib_select_multipath(&res);
else
#endif
- if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif)
+ if (!res.prefixlen &&
+ res.table->tb_num_default > 1 &&
+ res.type == RTN_UNICAST && !fl4->flowi4_oif)
fib_select_default(&res);
- if (!fl4.saddr)
- fl4.saddr = FIB_RES_PREFSRC(net, res);
+ if (!fl4->saddr)
+ fl4->saddr = FIB_RES_PREFSRC(net, res);
dev_out = FIB_RES_DEV(res);
- fl4.flowi4_oif = dev_out->ifindex;
+ fl4->flowi4_oif = dev_out->ifindex;
make_route:
- rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags);
+ rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
+ dev_out, flags);
if (!IS_ERR(rth)) {
unsigned int hash;
- hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif,
+ hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
rt_genid(dev_net(dev_out)));
- rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif);
+ rth = rt_intern_hash(hash, rth, NULL, orig_oif);
}
out:
@@ -2640,7 +2650,7 @@ out:
return rth;
}
-struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
+struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
{
struct rtable *rth;
unsigned int hash;
@@ -2658,13 +2668,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
rt_is_output_route(rth) &&
rth->rt_oif == flp4->flowi4_oif &&
rth->rt_mark == flp4->flowi4_mark &&
- !((rth->rt_tos ^ flp4->flowi4_tos) &
+ !((rth->rt_key_tos ^ flp4->flowi4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
net_eq(dev_net(rth->dst.dev), net) &&
!rt_is_expired(rth)) {
dst_use(&rth->dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
+ if (!flp4->saddr)
+ flp4->saddr = rth->rt_src;
+ if (!flp4->daddr)
+ flp4->daddr = rth->rt_dst;
return rth;
}
RT_CACHE_STAT_INC(out_hlist_search);
@@ -2709,7 +2723,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1);
+ struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
struct rtable *ort = (struct rtable *) dst_orig;
if (rt) {
@@ -2726,7 +2740,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_key_dst = ort->rt_key_dst;
rt->rt_key_src = ort->rt_key_src;
- rt->rt_tos = ort->rt_tos;
+ rt->rt_key_tos = ort->rt_key_tos;
rt->rt_route_iif = ort->rt_route_iif;
rt->rt_iif = ort->rt_iif;
rt->rt_oif = ort->rt_oif;
@@ -2762,15 +2776,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (IS_ERR(rt))
return rt;
- if (flp4->flowi4_proto) {
- if (!flp4->saddr)
- flp4->saddr = rt->rt_src;
- if (!flp4->daddr)
- flp4->daddr = rt->rt_dst;
+ if (flp4->flowi4_proto)
rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
flowi4_to_flowi(flp4),
sk, 0);
- }
return rt;
}
@@ -2794,7 +2803,7 @@ static int rt_fill_info(struct net *net,
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
- r->rtm_tos = rt->rt_tos;
+ r->rtm_tos = rt->rt_key_tos;
r->rtm_table = RT_TABLE_MAIN;
NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
r->rtm_type = rt->rt_type;
@@ -2848,7 +2857,9 @@ static int rt_fill_info(struct net *net,
if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
- int err = ipmr_get_route(net, skb, r, nowait);
+ int err = ipmr_get_route(net, skb,
+ rt->rt_src, rt->rt_dst,
+ r, nowait);
if (err <= 0) {
if (!nowait) {
if (err == 0)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8b44c6d2a79b..26461492a847 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
if (opt && opt->optlen) {
- int opt_size = sizeof(struct ip_options) + opt->optlen;
+ int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
- if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
+ if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
kfree(ireq->opt);
ireq->opt = NULL;
}
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* no easy way to do this.
*/
{
- struct flowi4 fl4 = {
- .flowi4_mark = sk->sk_mark,
- .daddr = ((opt && opt->srr) ?
- opt->faddr : ireq->rmt_addr),
- .saddr = ireq->loc_addr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
- .flowi4_proto = IPPROTO_TCP,
- .flowi4_flags = inet_sk_flowi_flags(sk),
- .fl4_sport = th->dest,
- .fl4_dport = th->source,
- };
+ struct flowi4 fl4;
+
+ flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
+ RT_SCOPE_UNIVERSE, IPPROTO_TCP,
+ inet_sk_flowi_flags(sk),
+ (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
+ ireq->loc_addr, th->source, th->dest);
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
/* We have some space in skb head. Superb! */
if (copy > skb_tailroom(skb))
copy = skb_tailroom(skb);
- if ((err = skb_add_data(skb, from, copy)) != 0)
+ err = skb_add_data_nocache(sk, skb, from, copy);
+ if (err)
goto do_fault;
} else {
int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
/* Time to copy data. We are close to
* the end! */
- err = skb_copy_to_page(sk, from, skb, page,
- off, copy);
+ err = skb_copy_to_page_nocache(sk, from, skb,
+ page, off, copy);
if (err) {
/* If this page was new, give it to the
* socket so it does not get leaked.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f7e6c2c2d2bb..f3d16d8918c7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
+ struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
__be16 orig_sport, orig_dport;
- struct rtable *rt;
__be32 daddr, nexthop;
+ struct flowi4 fl4;
+ struct rtable *rt;
int err;
+ struct ip_options_rcu *inet_opt;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@@ -161,15 +163,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -EAFNOSUPPORT;
nexthop = daddr = usin->sin_addr.s_addr;
- if (inet->opt && inet->opt->srr) {
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
+ if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
- nexthop = inet->opt->faddr;
+ nexthop = inet_opt->opt.faddr;
}
orig_sport = inet->inet_sport;
orig_dport = usin->sin_port;
- rt = ip_route_connect(nexthop, inet->inet_saddr,
+ rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk, true);
@@ -185,11 +189,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -ENETUNREACH;
}
- if (!inet->opt || !inet->opt->srr)
- daddr = rt->rt_dst;
+ if (!inet_opt || !inet_opt->opt.srr)
+ daddr = fl4.daddr;
if (!inet->inet_saddr)
- inet->inet_saddr = rt->rt_src;
+ inet->inet_saddr = fl4.saddr;
inet->inet_rcv_saddr = inet->inet_saddr;
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -200,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
if (tcp_death_row.sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
+ !tp->rx_opt.ts_recent_stamp && fl4.daddr == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
/*
* VJ's idea. We save last timestamp seen from
@@ -221,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = daddr;
inet_csk(sk)->icsk_ext_hdr_len = 0;
- if (inet->opt)
- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
+ if (inet_opt)
+ inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
@@ -236,8 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
- rt = ip_route_newports(rt, IPPROTO_TCP,
- orig_sport, orig_dport,
+ rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -279,7 +282,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
/*
* This routine does path mtu discovery as defined in RFC1191.
*/
-static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
+static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -341,7 +344,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
- struct iphdr *iph = (struct iphdr *)icmp_skb->data;
+ const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
struct inet_connection_sock *icsk;
struct tcp_sock *tp;
@@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
/*
* Save and compile IPv4 options into the request_sock if needed.
*/
-static struct ip_options *tcp_v4_save_options(struct sock *sk,
- struct sk_buff *skb)
+static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
+ struct sk_buff *skb)
{
- struct ip_options *opt = &(IPCB(skb)->opt);
- struct ip_options *dopt = NULL;
+ const struct ip_options *opt = &(IPCB(skb)->opt);
+ struct ip_options_rcu *dopt = NULL;
if (opt && opt->optlen) {
- int opt_size = optlength(opt);
+ int opt_size = sizeof(*dopt) + opt->optlen;
+
dopt = kmalloc(opt_size, GFP_ATOMIC);
if (dopt) {
- if (ip_options_echo(dopt, skb)) {
+ if (ip_options_echo(&dopt->opt, skb)) {
kfree(dopt);
dopt = NULL;
}
@@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct ip_options_rcu *inet_opt;
if (sk_acceptq_is_full(sk))
goto exit_overflow;
@@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->inet_daddr = ireq->rmt_addr;
newinet->inet_rcv_saddr = ireq->loc_addr;
newinet->inet_saddr = ireq->loc_addr;
- newinet->opt = ireq->opt;
+ inet_opt = ireq->opt;
+ rcu_assign_pointer(newinet->inet_opt, inet_opt);
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newinet->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
+ if (inet_opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
tcp_mtup_init(newsk);
@@ -2527,7 +2533,7 @@ void tcp4_proc_exit(void)
struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
- struct iphdr *iph = skb_gro_network_header(skb);
+ const struct iphdr *iph = skb_gro_network_header(skb);
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -2548,7 +2554,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
int tcp4_gro_complete(struct sk_buff *skb)
{
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f87a8eb76f3b..544f435d1aff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -578,7 +578,7 @@ found:
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
{
struct inet_sock *inet;
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
@@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
+ struct ip_options_data opt_copy;
if (len > 0xFFFF)
return -EMSGSIZE;
@@ -877,22 +878,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
free = 1;
connected = 0;
}
- if (!ipc.opt)
- ipc.opt = inet->opt;
+ if (!ipc.opt) {
+ struct ip_options_rcu *inet_opt;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ memcpy(&opt_copy, inet_opt,
+ sizeof(*inet_opt) + inet_opt->opt.optlen);
+ ipc.opt = &opt_copy.opt;
+ }
+ rcu_read_unlock();
+ }
saddr = ipc.addr;
ipc.addr = faddr = daddr;
- if (ipc.opt && ipc.opt->srr) {
+ if (ipc.opt && ipc.opt->opt.srr) {
if (!daddr)
return -EINVAL;
- faddr = ipc.opt->faddr;
+ faddr = ipc.opt->opt.faddr;
connected = 0;
}
tos = RT_TOS(inet->tos);
if (sock_flag(sk, SOCK_LOCALROUTE) ||
(msg->msg_flags & MSG_DONTROUTE) ||
- (ipc.opt && ipc.opt->is_strictroute)) {
+ (ipc.opt && ipc.opt->opt.is_strictroute)) {
tos |= RTO_ONLINK;
connected = 0;
}
@@ -909,20 +920,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
rt = (struct rtable *)sk_dst_check(sk, 0);
if (rt == NULL) {
- struct flowi4 fl4 = {
- .flowi4_oif = ipc.oif,
- .flowi4_mark = sk->sk_mark,
- .daddr = faddr,
- .saddr = saddr,
- .flowi4_tos = tos,
- .flowi4_proto = sk->sk_protocol,
- .flowi4_flags = (inet_sk_flowi_flags(sk) |
- FLOWI_FLAG_CAN_SLEEP),
- .fl4_sport = inet->inet_sport,
- .fl4_dport = dport,
- };
+ struct flowi4 fl4;
struct net *net = sock_net(sk);
+ flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+ RT_SCOPE_UNIVERSE, sk->sk_protocol,
+ inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
+ faddr, saddr, dport, inet->inet_sport);
+
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d20a05e970d8..7ff973bd02dd 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -73,7 +73,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
rt->rt_key_dst = fl4->daddr;
rt->rt_key_src = fl4->saddr;
- rt->rt_tos = fl4->flowi4_tos;
+ rt->rt_key_tos = fl4->flowi4_tos;
rt->rt_route_iif = fl4->flowi4_iif;
rt->rt_iif = fl4->flowi4_iif;
rt->rt_oif = fl4->flowi4_oif;
@@ -102,7 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
static void
_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
{
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1717c64628d1..ea983ae96ae6 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -55,7 +55,7 @@ xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
int xfrm4_extract_header(struct sk_buff *skb)
{
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
XFRM_MODE_SKB_CB(skb)->id = iph->id;