diff options
Diffstat (limited to 'net/ipv4')
31 files changed, 646 insertions, 579 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 807d83c02ef6..7b91fa8bf83c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); - kfree(inet->opt); + kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); sk_refcnt_debug_dec(sk); } @@ -1103,14 +1103,18 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; + struct flowi4 fl4; struct rtable *rt; __be32 new_saddr; + struct ip_options_rcu *inet_opt; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; /* Query new route. */ - rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), + rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, inet->inet_sport, inet->inet_dport, sk, false); if (IS_ERR(rt)) @@ -1118,7 +1122,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) sk_setup_caps(sk, &rt->dst); - new_saddr = rt->rt_src; + new_saddr = fl4.saddr; if (new_saddr == old_saddr) return 0; @@ -1147,6 +1151,8 @@ int inet_sk_rebuild_header(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); __be32 daddr; + struct ip_options_rcu *inet_opt; + struct flowi4 fl4; int err; /* Route is OK, nothing to do. */ @@ -1154,10 +1160,13 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); daddr = inet->inet_daddr; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rcu_read_unlock(); + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); @@ -1186,7 +1195,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - struct iphdr *iph; + const struct iphdr *iph; const struct net_protocol *ops; int proto; int ihl; @@ -1293,7 +1302,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, const struct net_protocol *ops; struct sk_buff **pp = NULL; struct sk_buff *p; - struct iphdr *iph; + const struct iphdr *iph; unsigned int hlen; unsigned int off; unsigned int id; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 4286fd3cc0e2..c1f4154552fc 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, * into IP header for icv calculation. Options are already checked * for validity, so paranoia is not required. */ -static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) +static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) { unsigned char * optptr = (unsigned char*)(iph+1); int l = iph->ihl*4 - sizeof(struct iphdr); @@ -396,7 +396,7 @@ out: static void ah4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -404,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a0af7ea87870..2b3c23c287cd 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, return CIPSO_V4_HDR_LEN + ret_val; } +static void opt_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_options_rcu, rcu)); +} + /** * cipso_v4_sock_setattr - Add a CIPSO option to a socket * @sk: the socket @@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk, unsigned char *buf = NULL; u32 buf_len; u32 opt_len; - struct ip_options *opt = NULL; + struct ip_options_rcu *old, *opt = NULL; struct inet_sock *sk_inet; struct inet_connection_sock *sk_conn; @@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk, ret_val = -ENOMEM; goto socket_setattr_failure; } - memcpy(opt->__data, buf, buf_len); - opt->optlen = opt_len; - opt->cipso = sizeof(struct iphdr); + memcpy(opt->opt.__data, buf, buf_len); + opt->opt.optlen = opt_len; + opt->opt.cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; sk_inet = inet_sk(sk); + + old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk)); if (sk_inet->is_icsk) { sk_conn = inet_csk(sk); - if (sk_inet->opt) - sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; - sk_conn->icsk_ext_hdr_len += opt->optlen; + if (old) + sk_conn->icsk_ext_hdr_len -= old->opt.optlen; + sk_conn->icsk_ext_hdr_len += opt->opt.optlen; sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); } - opt = xchg(&sk_inet->opt, opt); - kfree(opt); + rcu_assign_pointer(sk_inet->inet_opt, opt); + if (old) + call_rcu(&old->rcu, opt_kfree_rcu); return 0; @@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req, unsigned char *buf = NULL; u32 buf_len; u32 opt_len; - struct ip_options *opt = NULL; + struct ip_options_rcu *opt = NULL; struct inet_request_sock *req_inet; /* We allocate the maximum CIPSO option size here so we are probably @@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req, ret_val = -ENOMEM; goto req_setattr_failure; } - memcpy(opt->__data, buf, buf_len); - opt->optlen = opt_len; - opt->cipso = sizeof(struct iphdr); + memcpy(opt->opt.__data, buf, buf_len); + opt->opt.optlen = opt_len; + opt->opt.cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; req_inet = inet_rsk(req); opt = xchg(&req_inet->opt, opt); - kfree(opt); + if (opt) + call_rcu(&opt->rcu, opt_kfree_rcu); return 0; @@ -2016,34 +2025,34 @@ req_setattr_failure: * values on failure. * */ -static int cipso_v4_delopt(struct ip_options **opt_ptr) +static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) { int hdr_delta = 0; - struct ip_options *opt = *opt_ptr; + struct ip_options_rcu *opt = *opt_ptr; - if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; unsigned char *cipso_ptr; int iter; int optlen_new; - cipso_off = opt->cipso - sizeof(struct iphdr); - cipso_ptr = &opt->__data[cipso_off]; + cipso_off = opt->opt.cipso - sizeof(struct iphdr); + cipso_ptr = &opt->opt.__data[cipso_off]; cipso_len = cipso_ptr[1]; - if (opt->srr > opt->cipso) - opt->srr -= cipso_len; - if (opt->rr > opt->cipso) - opt->rr -= cipso_len; - if (opt->ts > opt->cipso) - opt->ts -= cipso_len; - if (opt->router_alert > opt->cipso) - opt->router_alert -= cipso_len; - opt->cipso = 0; + if (opt->opt.srr > opt->opt.cipso) + opt->opt.srr -= cipso_len; + if (opt->opt.rr > opt->opt.cipso) + opt->opt.rr -= cipso_len; + if (opt->opt.ts > opt->opt.cipso) + opt->opt.ts -= cipso_len; + if (opt->opt.router_alert > opt->opt.cipso) + opt->opt.router_alert -= cipso_len; + opt->opt.cipso = 0; memmove(cipso_ptr, cipso_ptr + cipso_len, - opt->optlen - cipso_off - cipso_len); + opt->opt.optlen - cipso_off - cipso_len); /* determining the new total option length is tricky because of * the padding necessary, the only thing i can think to do at @@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) * from there we can determine the new total option length */ iter = 0; optlen_new = 0; - while (iter < opt->optlen) - if (opt->__data[iter] != IPOPT_NOP) { - iter += opt->__data[iter + 1]; + while (iter < opt->opt.optlen) + if (opt->opt.__data[iter] != IPOPT_NOP) { + iter += opt->opt.__data[iter + 1]; optlen_new = iter; } else iter++; - hdr_delta = opt->optlen; - opt->optlen = (optlen_new + 3) & ~3; - hdr_delta -= opt->optlen; + hdr_delta = opt->opt.optlen; + opt->opt.optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->opt.optlen; } else { /* only the cipso option was present on the socket so we can * remove the entire option struct */ *opt_ptr = NULL; - hdr_delta = opt->optlen; - kfree(opt); + hdr_delta = opt->opt.optlen; + call_rcu(&opt->rcu, opt_kfree_rcu); } return hdr_delta; @@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) void cipso_v4_sock_delattr(struct sock *sk) { int hdr_delta; - struct ip_options *opt; + struct ip_options_rcu *opt; struct inet_sock *sk_inet; sk_inet = inet_sk(sk); - opt = sk_inet->opt; - if (opt == NULL || opt->cipso == 0) + opt = rcu_dereference_protected(sk_inet->inet_opt, 1); + if (opt == NULL || opt->opt.cipso == 0) return; - hdr_delta = cipso_v4_delopt(&sk_inet->opt); + hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { struct inet_connection_sock *sk_conn = inet_csk(sk); sk_conn->icsk_ext_hdr_len -= hdr_delta; @@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk) */ void cipso_v4_req_delattr(struct request_sock *req) { - struct ip_options *opt; + struct ip_options_rcu *opt; struct inet_request_sock *req_inet; req_inet = inet_rsk(req); opt = req_inet->opt; - if (opt == NULL || opt->cipso == 0) + if (opt == NULL || opt->opt.cipso == 0) return; cipso_v4_delopt(&req_inet->opt); @@ -2184,14 +2193,18 @@ getattr_return: */ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { - struct ip_options *opt; + struct ip_options_rcu *opt; + int res = -ENOMSG; - opt = inet_sk(sk)->opt; - if (opt == NULL || opt->cipso == 0) - return -ENOMSG; - - return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), - secattr); + rcu_read_lock(); + opt = rcu_dereference(inet_sk(sk)->inet_opt); + if (opt && opt->opt.cipso) + res = cipso_v4_getattr(opt->opt.__data + + opt->opt.cipso - + sizeof(struct iphdr), + secattr); + rcu_read_unlock(); + return res; } /** diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 85bd24ca4f6d..d5a2e6995bae 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; + struct flowi4 fl4; struct rtable *rt; __be32 saddr; int oif; @@ -46,7 +47,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!saddr) saddr = inet->mc_addr; } - rt = ip_route_connect(usin->sin_addr.s_addr, saddr, + rt = ip_route_connect(&fl4, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, inet->inet_sport, usin->sin_port, sk, true); @@ -62,13 +63,13 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EACCES; } if (!inet->inet_saddr) - inet->inet_saddr = rt->rt_src; /* Update source address */ + inet->inet_saddr = fl4.saddr; /* Update source address */ if (!inet->inet_rcv_saddr) { - inet->inet_rcv_saddr = rt->rt_src; + inet->inet_rcv_saddr = fl4.saddr; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } - inet->inet_daddr = rt->rt_dst; + inet->inet_daddr = fl4.daddr; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 03f994bcf7de..a5b413416da3 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -276,7 +276,7 @@ error: static int esp_input_done2(struct sk_buff *skb, int err) { - struct iphdr *iph; + const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); struct esp_data *esp = x->data; struct crypto_aead *aead = esp->aead; @@ -484,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) static void esp4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -492,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451088330bbb..22524716fe70 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -44,6 +44,7 @@ #include <net/arp.h> #include <net/ip_fib.h> #include <net/rtnetlink.h> +#include <net/xfrm.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type); * - check, that packet arrived from expected physical interface. * called with rcu_read_lock() */ -int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, - struct net_device *dev, __be32 *spec_dst, - u32 *itag, u32 mark) +int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, + int oif, struct net_device *dev, __be32 *spec_dst, + u32 *itag) { struct in_device *in_dev; struct flowi4 fl4; @@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fl4.flowi4_oif = 0; fl4.flowi4_iif = oif; - fl4.flowi4_mark = mark; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; @@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; - rpf = IN_DEV_RPFILTER(in_dev); + + /* Ignore rp_filter for packets protected by IPsec. */ + rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); + accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); - if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl4.flowi4_mark = 0; + fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; } if (in_dev == NULL) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5fe9b8b41df3..6375c1c5f642 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -126,7 +126,7 @@ struct tnode { struct work_struct work; struct tnode *tnode_free; }; - struct rt_trie_node *child[0]; + struct rt_trie_node __rcu *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -151,7 +151,7 @@ struct trie_stat { }; struct trie { - struct rt_trie_node *trie; + struct rt_trie_node __rcu *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif @@ -177,16 +177,29 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -static inline struct tnode *node_parent(struct rt_trie_node *node) +/* + * caller must hold RTNL + */ +static inline struct tnode *node_parent(const struct rt_trie_node *node) { - return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); + unsigned long parent; + + parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); + + return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) +/* + * caller must hold RCU read lock or RTNL + */ +static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) { - struct tnode *ret = node_parent(node); + unsigned long parent; + + parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || + lockdep_rtnl_is_held()); - return rcu_dereference_rtnl(ret); + return (struct tnode *)(parent & ~NODE_TYPE_MASK); } /* Same as rcu_assign_pointer @@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) node->parent = (unsigned long)ptr | NODE_TYPE(node); } -static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) +/* + * caller must hold RTNL + */ +static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) { BUG_ON(i >= 1U << tn->bits); - return tn->child[i]; + return rtnl_dereference(tn->child[i]); } -static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) +/* + * caller must hold RCU read lock or RTNL + */ +static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) { - struct rt_trie_node *ret = tnode_get_child(tn, i); + BUG_ON(i >= 1U << tn->bits); - return rcu_dereference_rtnl(ret); + return rcu_dereference_rtnl(tn->child[i]); } static inline int tnode_child_length(const struct tnode *tn) @@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull) { - struct rt_trie_node *chi = tn->child[i]; + struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); int isfull; BUG_ON(i >= 1<<tn->bits); @@ -665,7 +684,7 @@ one_child: for (i = 0; i < tnode_child_length(tn); i++) { struct rt_trie_node *n; - n = tn->child[i]; + n = rtnl_dereference(tn->child[i]); if (!n) continue; @@ -679,6 +698,20 @@ one_child: return (struct rt_trie_node *) tn; } + +static void tnode_clean_free(struct tnode *tn) +{ + int i; + struct tnode *tofree; + + for (i = 0; i < tnode_child_length(tn); i++) { + tofree = (struct tnode *)rtnl_dereference(tn->child[i]); + if (tofree) + tnode_free(tofree); + } + tnode_free(tn); +} + static struct tnode *inflate(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; @@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode = (struct tnode *) node; if (inode->bits == 1) { - put_child(t, tn, 2*i, inode->child[0]); - put_child(t, tn, 2*i+1, inode->child[1]); + put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); + put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); tnode_free_safe(inode); continue; @@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) size = tnode_child_length(left); for (j = 0; j < size; j++) { - put_child(t, left, j, inode->child[j]); - put_child(t, right, j, inode->child[j + size]); + put_child(t, left, j, rtnl_dereference(inode->child[j])); + put_child(t, right, j, rtnl_dereference(inode->child[j + size])); } put_child(t, tn, 2*i, resize(t, left)); put_child(t, tn, 2*i+1, resize(t, right)); @@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) tnode_free_safe(oldtnode); return tn; nomem: - { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - return ERR_PTR(-ENOMEM); - } + tnode_clean_free(tn); + return ERR_PTR(-ENOMEM); } static struct tnode *halve(struct trie *t, struct tnode *tn) @@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) tnode_free_safe(oldtnode); return tn; nomem: - { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - return ERR_PTR(-ENOMEM); - } + tnode_clean_free(tn); + return ERR_PTR(-ENOMEM); } /* readside must use rcu_read_lock currently dump routines @@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) t_key cindex; pos = 0; - n = t->trie; + n = rtnl_dereference(t->trie); /* If we point to NULL, stop. Either the tree is empty and we should * just put a new leaf in if, or we have reached an empty child slot, @@ -1319,6 +1332,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) } } + if (!plen) + tb->tb_num_default++; + list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); @@ -1684,6 +1700,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) list_del_rcu(&fa->fa_list); + if (!plen) + tb->tb_num_default--; + if (list_empty(fa_head)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); @@ -1756,7 +1775,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) continue; if (IS_LEAF(c)) { - prefetch(p->child[idx]); + prefetch(rcu_dereference_rtnl(p->child[idx])); return (struct leaf *) c; } @@ -1974,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; + tb->tb_num_default = 0; t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); @@ -2269,7 +2289,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* walk rest of this hash chain */ h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); - while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { + while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { tb = hlist_entry(tb_node, struct fib_table, tb_hlist); n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e5f8a71d3a2a..cfeca3c2152d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -108,8 +108,7 @@ struct icmp_bxm { __be32 times[3]; } data; int head_len; - struct ip_options replyopts; - unsigned char optbuf[40]; + struct ip_options_data replyopts; }; /* An array of errno for error messages from dest unreach. */ @@ -333,7 +332,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct inet_sock *inet; __be32 daddr; - if (ip_options_echo(&icmp_param->replyopts, skb)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; sk = icmp_xmit_lock(net); @@ -347,10 +346,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; ipc.tx_flags = 0; - if (icmp_param->replyopts.optlen) { - ipc.opt = &icmp_param->replyopts; - if (ipc.opt->srr) - daddr = icmp_param->replyopts.faddr; + if (icmp_param->replyopts.opt.opt.optlen) { + ipc.opt = &icmp_param->replyopts.opt; + if (ipc.opt->opt.srr) + daddr = icmp_param->replyopts.opt.opt.faddr; } { struct flowi4 fl4 = { @@ -373,14 +372,14 @@ out_unlock: } static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, - struct iphdr *iph, + const struct iphdr *iph, __be32 saddr, u8 tos, int type, int code, struct icmp_bxm *param) { struct flowi4 fl4 = { - .daddr = (param->replyopts.srr ? - param->replyopts.faddr : iph->saddr), + .daddr = (param->replyopts.opt.opt.srr ? + param->replyopts.opt.opt.faddr : iph->saddr), .saddr = saddr, .flowi4_tos = RT_TOS(tos), .flowi4_proto = IPPROTO_ICMP, @@ -581,7 +580,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts, skb_in)) + if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -597,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts; + ipc.opt = &icmp_param.replyopts.opt; ipc.tx_flags = 0; rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, @@ -613,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; + room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); icmp_param.data_len = skb_in->len - icmp_param.offset; @@ -637,7 +636,7 @@ EXPORT_SYMBOL(icmp_send); static void icmp_unreach(struct sk_buff *skb) { - struct iphdr *iph; + const struct iphdr *iph; struct icmphdr *icmph; int hash, protocol; const struct net_protocol *ipprot; @@ -656,7 +655,7 @@ static void icmp_unreach(struct sk_buff *skb) goto out_err; icmph = icmp_hdr(skb); - iph = (struct iphdr *)skb->data; + iph = (const struct iphdr *)skb->data; if (iph->ihl < 5) /* Mangled header, drop. */ goto out_err; @@ -729,7 +728,7 @@ static void icmp_unreach(struct sk_buff *skb) if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) goto out; - iph = (struct iphdr *)skb->data; + iph = (const struct iphdr *)skb->data; protocol = iph->protocol; /* @@ -758,7 +757,7 @@ out_err: static void icmp_redirect(struct sk_buff *skb) { - struct iphdr *iph; + const struct iphdr *iph; if (skb->len < sizeof(struct iphdr)) goto out_err; @@ -769,7 +768,7 @@ static void icmp_redirect(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; - iph = (struct iphdr *)skb->data; + iph = (const struct iphdr *)skb->data; switch (icmp_hdr(skb)->code & 7) { case ICMP_REDIR_NET: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 1fd3d9ce8398..ec03c2fda6ce 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -309,6 +309,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct iphdr *pip; struct igmpv3_report *pig; struct net *net = dev_net(dev); + struct flowi4 fl4; while (1) { skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), @@ -321,18 +322,13 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) } igmp_skb_size(skb) = size; - rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, + rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, 0, 0, IPPROTO_IGMP, 0, dev->ifindex); if (IS_ERR(rt)) { kfree_skb(skb); return NULL; } - if (rt->rt_src == 0) { - kfree_skb(skb); - ip_rt_put(rt); - return NULL; - } skb_dst_set(skb, &rt->dst); skb->dev = dev; @@ -348,8 +344,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->tos = 0xc0; pip->frag_off = htons(IP_DF); pip->ttl = 1; - pip->daddr = rt->rt_dst; - pip->saddr = rt->rt_src; + pip->daddr = fl4.daddr; + pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(pip, &rt->dst, NULL); @@ -655,6 +651,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct net_device *dev = in_dev->dev; struct net *net = dev_net(dev); __be32 group = pmc ? pmc->multiaddr : 0; + struct flowi4 fl4; __be32 dst; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) @@ -664,17 +661,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, else dst = group; - rt = ip_route_output_ports(net, NULL, dst, 0, + rt = ip_route_output_ports(net, &fl4, NULL, dst, 0, 0, 0, IPPROTO_IGMP, 0, dev->ifindex); if (IS_ERR(rt)) return -1; - if (rt->rt_src == 0) { - ip_rt_put(rt); - return -1; - } - skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); @@ -695,7 +687,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->daddr = dst; - iph->saddr = rt->rt_src; + iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; ip_select_ident(iph, &rt->dst, NULL); ((u8*)&iph[1])[0] = IPOPT_RA; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 38f23e721b80..54944da2f794 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -354,26 +354,20 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); - struct ip_options *opt = inet_rsk(req)->opt; - struct flowi4 fl4 = { - .flowi4_oif = sk->sk_bound_dev_if, - .flowi4_mark = sk->sk_mark, - .daddr = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .saddr = ireq->loc_addr, - .flowi4_tos = RT_CONN_FLAGS(sk), - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = inet_sk_flowi_flags(sk), - .fl4_sport = inet_sk(sk)->inet_sport, - .fl4_dport = ireq->rmt_port, - }; + struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); + struct flowi4 fl4; + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && fl4.daddr != rt->rt_gateway) goto route_err; return &rt->dst; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2ada17129fce..6ffe94ca5bc9 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -124,7 +124,7 @@ static int inet_csk_diag_fill(struct sock *sk, #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &np->rcv_saddr); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 47038cb6c138..85a0f75dae64 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -51,8 +51,8 @@ MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); * Basic tcp checks whether packet is suitable for LRO */ -static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, - int len, struct net_lro_desc *lro_desc) +static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph, + int len, const struct net_lro_desc *lro_desc) { /* check ip header: don't aggregate padded frames */ if (ntohs(iph->tot_len) != len) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index da5941f18c3c..8871067560db 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -413,11 +413,6 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, dev_net_set(dev, net); - if (strchr(name, '%')) { - if (dev_alloc_name(dev, name) < 0) - goto failed_free; - } - nt = netdev_priv(dev); nt->parms = *parms; dev->rtnl_link_ops = &ipgre_link_ops; @@ -462,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) by themself??? */ - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; @@ -534,7 +529,7 @@ out: rcu_read_unlock(); } -static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) { if (INET_ECN_is_ce(iph->tos)) { if (skb->protocol == htons(ETH_P_IP)) { @@ -546,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) } static inline u8 -ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) +ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) { u8 inner = 0; if (skb->protocol == htons(ETH_P_IP)) inner = old_iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) - inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); + inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); return INET_ECN_encapsulate(tos, inner); } static int ipgre_rcv(struct sk_buff *skb) { - struct iphdr *iph; + const struct iphdr *iph; u8 *h; __be16 flags; __sum16 csum = 0; @@ -697,8 +692,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev { struct ip_tunnel *tunnel = netdev_priv(dev); struct pcpu_tstats *tstats; - struct iphdr *old_iph = ip_hdr(skb); - struct iphdr *tiph; + const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *tiph; + struct flowi4 fl4; u8 tos; __be16 df; struct rtable *rt; /* Route to the other host */ @@ -714,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - tiph = (struct iphdr *)skb->data; + tiph = (const struct iphdr *)skb->data; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; @@ -735,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct in6_addr *addr6; + const struct in6_addr *addr6; int addr_type; struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; - addr6 = (struct in6_addr *)&neigh->primary_key; + addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { @@ -766,10 +762,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) - tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); + tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); } - rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, + rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); if (IS_ERR(rt)) { @@ -873,15 +869,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->frag_off = df; iph->protocol = IPPROTO_GRE; iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); - iph->daddr = rt->rt_dst; - iph->saddr = rt->rt_src; + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; + iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; #endif else iph->ttl = ip4_dst_hoplimit(&rt->dst); @@ -927,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel; - struct iphdr *iph; + const struct iphdr *iph; int hlen = LL_MAX_HEADER; int mtu = ETH_DATA_LEN; int addend = sizeof(struct iphdr) + 4; @@ -938,12 +934,14 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { - struct rtable *rt = ip_route_output_gre(dev_net(dev), - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); - + struct flowi4 fl4; + struct rtable *rt; + + rt = ip_route_output_gre(dev_net(dev), &fl4, + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); @@ -1180,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) { - struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); + const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4); return 4; } @@ -1196,13 +1194,15 @@ static int ipgre_open(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { - struct rtable *rt = ip_route_output_gre(dev_net(dev), - t->parms.iph.daddr, - t->parms.iph.saddr, - t->parms.o_key, - RT_TOS(t->parms.iph.tos), - t->parms.link); - + struct flowi4 fl4; + struct rtable *rt; + + rt = ip_route_output_gre(dev_net(dev), &fl4, + t->parms.iph.daddr, + t->parms.iph.saddr, + t->parms.o_key, + RT_TOS(t->parms.iph.tos), + t->parms.link); if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt->dst.dev; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d7b2b0987a3b..c8f48efc5fd3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb) static inline int ip_rcv_options(struct sk_buff *skb) { struct ip_options *opt; - struct iphdr *iph; + const struct iphdr *iph; struct net_device *dev = skb->dev; /* It looks as overkill, because not all @@ -374,7 +374,7 @@ drop: */ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct iphdr *iph; + const struct iphdr *iph; u32 len; /* When the interface is in promisc. mode, drop all the crap diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2391b24e8251..01fc40965848 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -36,7 +36,7 @@ * saddr is address of outgoing interface. */ -void ip_options_build(struct sk_buff * skb, struct ip_options * opt, +void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag) { unsigned char *iph = skb_network_header(skb); @@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, * NOTE: dopt cannot point to skb. */ -int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) +int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) { - struct ip_options *sopt; + const struct ip_options *sopt; unsigned char *sptr, *dptr; int soffset, doffset; int optlen; @@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) sopt = &(IPCB(skb)->opt); - if (sopt->optlen == 0) { - dopt->optlen = 0; + if (sopt->optlen == 0) return 0; - } sptr = skb_network_header(skb); dptr = dopt->__data; @@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->optlen += optlen; } if (sopt->srr) { - unsigned char * start = sptr+sopt->srr; + unsigned char *start = sptr+sopt->srr; __be32 faddr; optlen = start[1]; @@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt) } } -static struct ip_options *ip_options_get_alloc(const int optlen) +static struct ip_options_rcu *ip_options_get_alloc(const int optlen) { - return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), + return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), GFP_KERNEL); } -static int ip_options_get_finish(struct net *net, struct ip_options **optp, - struct ip_options *opt, int optlen) +static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, + struct ip_options_rcu *opt, int optlen) { while (optlen & 3) - opt->__data[optlen++] = IPOPT_END; - opt->optlen = optlen; - if (optlen && ip_options_compile(net, opt, NULL)) { + opt->opt.__data[optlen++] = IPOPT_END; + opt->opt.optlen = optlen; + if (optlen && ip_options_compile(net, &opt->opt, NULL)) { kfree(opt); return -EINVAL; } @@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp, return 0; } -int ip_options_get_from_user(struct net *net, struct ip_options **optp, +int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; - if (optlen && copy_from_user(opt->__data, data, optlen)) { + if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { kfree(opt); return -EFAULT; } return ip_options_get_finish(net, optp, opt, optlen); } -int ip_options_get(struct net *net, struct ip_options **optp, +int ip_options_get(struct net *net, struct ip_options_rcu **optp, unsigned char *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; if (optlen) - memcpy(opt->__data, data, optlen); + memcpy(opt->opt.__data, data, optlen); return ip_options_get_finish(net, optp, opt, optlen); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 459c011b1d4a..db38c1822de8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) * */ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, - __be32 saddr, __be32 daddr, struct ip_options *opt) + __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = skb_rtable(skb); struct iphdr *iph; /* Build the IP header. */ - skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); + skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; @@ -158,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, else iph->frag_off = 0; iph->ttl = ip_select_ttl(inet, &rt->dst); - iph->daddr = rt->rt_dst; - iph->saddr = rt->rt_src; + iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); + iph->saddr = saddr; iph->protocol = sk->sk_protocol; ip_select_ident(iph, &rt->dst, sk); - if (opt && opt->optlen) { - iph->ihl += opt->optlen>>2; - ip_options_build(skb, opt, daddr, rt, 0); + if (opt && opt->opt.optlen) { + iph->ihl += opt->opt.optlen>>2; + ip_options_build(skb, &opt->opt, daddr, rt, 0); } skb->priority = sk->sk_priority; @@ -316,7 +316,7 @@ int ip_queue_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - struct ip_options *opt = inet->opt; + struct ip_options_rcu *inet_opt; struct rtable *rt; struct iphdr *iph; int res; @@ -325,6 +325,7 @@ int ip_queue_xmit(struct sk_buff *skb) * f.e. by something like SCTP. */ rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); rt = skb_rtable(skb); if (rt != NULL) goto packet_routed; @@ -332,18 +333,19 @@ int ip_queue_xmit(struct sk_buff *skb) /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); if (rt == NULL) { + struct flowi4 fl4; __be32 daddr; /* Use correct destination address if we have options. */ daddr = inet->inet_daddr; - if(opt && opt->srr) - daddr = opt->faddr; + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(sock_net(sk), sk, + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, @@ -357,11 +359,11 @@ int ip_queue_xmit(struct sk_buff *skb) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ - skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); + skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); @@ -375,9 +377,9 @@ packet_routed: iph->daddr = rt->rt_dst; /* Transport layer set skb->h.foo itself. */ - if (opt && opt->optlen) { - iph->ihl += opt->optlen >> 2; - ip_options_build(skb, opt, inet->inet_daddr, rt, 0); + if (inet_opt && inet_opt->opt.optlen) { + iph->ihl += inet_opt->opt.optlen >> 2; + ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } ip_select_ident_more(iph, &rt->dst, sk, @@ -1033,7 +1035,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, struct ipcm_cookie *ipc, struct rtable **rtp) { struct inet_sock *inet = inet_sk(sk); - struct ip_options *opt; + struct ip_options_rcu *opt; struct rtable *rt; /* @@ -1047,7 +1049,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(cork->opt == NULL)) return -ENOBUFS; } - memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); + memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); cork->flags |= IPCORK_OPT; cork->addr = ipc->addr; } @@ -1451,39 +1453,34 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar unsigned int len) { struct inet_sock *inet = inet_sk(sk); - struct { - struct ip_options opt; - char data[40]; - } replyopts; + struct ip_options_data replyopts; struct ipcm_cookie ipc; __be32 daddr; struct rtable *rt = skb_rtable(skb); - if (ip_options_echo(&replyopts.opt, skb)) + if (ip_options_echo(&replyopts.opt.opt, skb)) return; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; ipc.tx_flags = 0; - if (replyopts.opt.optlen) { + if (replyopts.opt.opt.optlen) { ipc.opt = &replyopts.opt; - if (ipc.opt->srr) - daddr = replyopts.opt.faddr; + if (replyopts.opt.opt.srr) + daddr = replyopts.opt.opt.faddr; } { - struct flowi4 fl4 = { - .flowi4_oif = arg->bound_dev_if, - .daddr = daddr, - .saddr = rt->rt_spec_dst, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl4_sport = tcp_hdr(skb)->dest, - .fl4_dport = tcp_hdr(skb)->source, - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = ip_reply_arg_flowi_flags(arg), - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, arg->bound_dev_if, 0, + RT_TOS(ip_hdr(skb)->tos), + RT_SCOPE_UNIVERSE, sk->sk_protocol, + ip_reply_arg_flowi_flags(arg), + daddr, rt->rt_spec_dst, + tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3948c86e59ca..ab0c9efd1efa 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -131,7 +131,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); __be16 *ports = (__be16 *)skb_transport_header(skb); if (skb_transport_offset(skb) + 4 > skb->len) @@ -451,6 +451,11 @@ out: } +static void opt_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_options_rcu, rcu)); +} + /* * Socket option code for IP. This is the end of the line after any * TCP,UDP etc options on an IP socket. @@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, switch (optname) { case IP_OPTIONS: { - struct ip_options *opt = NULL; + struct ip_options_rcu *old, *opt = NULL; + if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); if (err) break; + old = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level, (TCPF_LISTEN | TCPF_CLOSE)) && inet->inet_daddr != LOOPBACK4_IPV6)) { #endif - if (inet->opt) - icsk->icsk_ext_hdr_len -= inet->opt->optlen; + if (old) + icsk->icsk_ext_hdr_len -= old->opt.optlen; if (opt) - icsk->icsk_ext_hdr_len += opt->optlen; + icsk->icsk_ext_hdr_len += opt->opt.optlen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) } #endif } - opt = xchg(&inet->opt, opt); - kfree(opt); + rcu_assign_pointer(inet->inet_opt, opt); + if (old) + call_rcu(&old->rcu, opt_kfree_rcu); break; } case IP_PKTINFO: @@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; - struct ip_options * opt = (struct ip_options *)optbuf; + struct ip_options *opt = (struct ip_options *)optbuf; + struct ip_options_rcu *inet_opt; + + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); opt->optlen = 0; - if (inet->opt) - memcpy(optbuf, inet->opt, - sizeof(struct ip_options)+ - inet->opt->optlen); + if (inet_opt) + memcpy(optbuf, &inet_opt->opt, + sizeof(struct ip_options) + + inet_opt->opt.optlen); release_sock(sk); if (opt->optlen == 0) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 629067571f02..c857f6f49b03 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -27,7 +27,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); __be32 spi; - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index bfc17c5914e7..378b20b7ca6e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -276,11 +276,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, dev_net_set(dev, net); - if (strchr(name, '%')) { - if (dev_alloc_name(dev, name) < 0) - goto failed_free; - } - nt = netdev_priv(dev); nt->parms = *parms; @@ -319,7 +314,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; @@ -433,15 +428,16 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct pcpu_tstats *tstats; - struct iphdr *tiph = &tunnel->parms.iph; + const struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph = ip_hdr(skb); struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; + struct flowi4 fl4; int mtu; if (skb->protocol != htons(ETH_P_IP)) @@ -460,7 +456,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } - rt = ip_route_output_ports(dev_net(dev), NULL, + rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, dst, tiph->saddr, 0, 0, IPPROTO_IPIP, RT_TOS(tos), @@ -549,8 +545,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); - iph->daddr = rt->rt_dst; - iph->saddr = rt->rt_src; + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; if ((iph->ttl = tiph->ttl) == 0) iph->ttl = old_iph->ttl; @@ -572,19 +568,21 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel; - struct iphdr *iph; + const struct iphdr *iph; tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; if (iph->daddr) { - struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, - iph->daddr, iph->saddr, - 0, 0, - IPPROTO_IPIP, - RT_TOS(iph->tos), - tunnel->parms.link); - + struct rtable *rt; + struct flowi4 fl4; + + rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1f62eaeb6de4..30a7763c400e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1549,7 +1549,7 @@ static struct notifier_block ip_mr_notifier = { static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph; - struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph = ip_hdr(skb); skb_push(skb, sizeof(struct iphdr)); skb->transport_header = skb->network_header; @@ -1595,6 +1595,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *dev; struct rtable *rt; + struct flowi4 fl4; int encap = 0; if (vif->dev == NULL) @@ -1612,7 +1613,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, #endif if (vif->flags & VIFF_TUNNEL) { - rt = ip_route_output_ports(net, NULL, + rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, 0, 0, IPPROTO_IPIP, @@ -1621,7 +1622,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; encap = sizeof(struct iphdr); } else { - rt = ip_route_output_ports(net, NULL, iph->daddr, 0, + rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 0, 0, IPPROTO_IPIP, RT_TOS(iph->tos), vif->link); @@ -1788,12 +1789,14 @@ dont_forward: return 0; } -static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) +static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) { + struct rtable *rt = skb_rtable(skb); + struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { - .daddr = rt->rt_key_dst, - .saddr = rt->rt_key_src, - .flowi4_tos = rt->rt_tos, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowi4_tos = iph->tos, .flowi4_oif = rt->rt_oif, .flowi4_iif = rt->rt_iif, .flowi4_mark = rt->rt_mark, @@ -1825,7 +1828,7 @@ int ip_mr_input(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward; - mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) { kfree_skb(skb); return PTR_ERR(mrt); @@ -1957,7 +1960,7 @@ int pim_rcv_v1(struct sk_buff *skb) pim = igmp_hdr(skb); - mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) goto drop; if (!mrt->mroute_do_pim || @@ -1989,7 +1992,7 @@ static int pim_rcv(struct sk_buff *skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) goto drop; if (__pim_rcv(mrt, skb, sizeof(*pim))) { @@ -2038,20 +2041,20 @@ rtattr_failure: return -EMSGSIZE; } -int ipmr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ipmr_get_route(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr, + struct rtmsg *rtm, int nowait) { - int err; - struct mr_table *mrt; struct mfc_cache *cache; - struct rtable *rt = skb_rtable(skb); + struct mr_table *mrt; + int err; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); if (mrt == NULL) return -ENOENT; rcu_read_lock(); - cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); + cache = ipmr_cache_find(mrt, saddr, daddr); if (cache == NULL) { struct sk_buff *skb2; @@ -2084,8 +2087,8 @@ int ipmr_get_route(struct net *net, skb_reset_network_header(skb2); iph = ip_hdr(skb2); iph->ihl = sizeof(struct iphdr) >> 2; - iph->saddr = rt->rt_src; - iph->daddr = rt->rt_dst; + iph->saddr = saddr; + iph->daddr = daddr; iph->version = 0; err = ipmr_cache_unresolved(mrt, vif, skb2); read_unlock(&mrt_lock); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 89bc7e66d598..fd7a3f68917f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, void *table_base; const struct xt_table_info *private; struct xt_action_param acpar; + unsigned int addend; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - xt_info_rdlock_bh(); + local_bh_disable(); + addend = xt_write_recseq_begin(); private = table->private; table_base = private->entries[smp_processor_id()]; @@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - xt_info_rdunlock_bh(); + xt_write_recseq_end(addend); + local_bh_enable(); if (acpar.hotdrop) return NF_DROP; @@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t, unsigned int i; for_each_possible_cpu(cpu) { - seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { @@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t, unsigned int start; do { - start = read_seqbegin(lock); + start = read_seqcount_begin(s); bcnt = iter->counters.bcnt; pcnt = iter->counters.pcnt; - } while (read_seqretry(lock, start)); + } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; @@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user, int ret = 0; void *loc_cpu_entry; struct arpt_entry *iter; + unsigned int addend; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user, /* Choose the copy that is on our node */ curcpu = smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; - xt_info_wrlock(curcpu); + addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); ++i; } - xt_info_wrunlock(curcpu); + xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 704915028009..764743843503 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info) } EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); -/* - We keep a set of rules for each CPU, so we can avoid write-locking - them in the softirq when updating the counters and therefore - only need to read-lock in the softirq; doing a write_lock_bh() in user - context stops packets coming through and allows user context to read - the counters or update the rules. - - Hence the start of any table is given by get_table() below. */ - /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb, unsigned int *stackptr, origptr, cpu; const struct xt_table_info *private; struct xt_action_param acpar; + unsigned int addend; /* Initialization */ ip = ip_hdr(skb); @@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb, acpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - xt_info_rdlock_bh(); + local_bh_disable(); + addend = xt_write_recseq_begin(); private = table->private; cpu = smp_processor_id(); table_base = private->entries[cpu]; @@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb, pr_debug("Exiting %s; resetting sp from %u to %u\n", __func__, *stackptr, origptr); *stackptr = origptr; - xt_info_rdunlock_bh(); + xt_write_recseq_end(addend); + local_bh_enable(); + #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; #else @@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t, unsigned int i; for_each_possible_cpu(cpu) { - seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { @@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t, unsigned int start; do { - start = read_seqbegin(lock); + start = read_seqcount_begin(s); bcnt = iter->counters.bcnt; pcnt = iter->counters.pcnt; - } while (read_seqretry(lock, start)); + } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ @@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user, int ret = 0; void *loc_cpu_entry; struct ipt_entry *iter; + unsigned int addend; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user, /* Choose the copy that is on our node */ curcpu = smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; - xt_info_wrlock(curcpu); + addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); ++i; } - xt_info_wrunlock(curcpu); + xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 31427fb57aa8..99cfa28b6d38 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -153,7 +153,7 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); -static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data, +static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, int datalen, __sum16 *check, int oldlen) { struct rtable *rt = skb_rtable(skb); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bceaec42c37d..a8659e0c4a6e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -154,7 +154,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ -static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) +static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; @@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) } if (inet->recverr) { - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; u8 *payload = skb->data + (iph->ihl << 2); if (inet->hdrincl) @@ -265,7 +265,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { int hash; struct sock *raw_sk; - struct iphdr *iph; + const struct iphdr *iph; struct net *net; hash = protocol & (RAW_HTABLE_SIZE - 1); @@ -273,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) read_lock(&raw_v4_hashinfo.lock); raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); if (raw_sk != NULL) { - iph = (struct iphdr *)skb->data; + iph = (const struct iphdr *)skb->data; net = dev_net(skb->dev); while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, @@ -281,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) skb->dev->ifindex)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = sk_next(raw_sk); - iph = (struct iphdr *)skb->data; + iph = (const struct iphdr *)skb->data; } } read_unlock(&raw_v4_hashinfo.lock); @@ -460,6 +460,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, __be32 saddr; u8 tos; int err; + struct ip_options_data opt_copy; err = -EMSGSIZE; if (len > 0xFFFF) @@ -520,8 +521,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, saddr = ipc.addr; ipc.addr = daddr; - if (!ipc.opt) - ipc.opt = inet->opt; + if (!ipc.opt) { + struct ip_options_rcu *inet_opt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + memcpy(&opt_copy, inet_opt, + sizeof(*inet_opt) + inet_opt->opt.optlen); + ipc.opt = &opt_copy.opt; + } + rcu_read_unlock(); + } if (ipc.opt) { err = -EINVAL; @@ -530,10 +541,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, */ if (inet->hdrincl) goto done; - if (ipc.opt->srr) { + if (ipc.opt->opt.srr) { if (!daddr) goto done; - daddr = ipc.opt->faddr; + daddr = ipc.opt->opt.faddr; } } tos = RT_CONN_FLAGS(sk); @@ -548,17 +559,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi4 fl4 = { - .flowi4_oif = ipc.oif, - .flowi4_mark = sk->sk_mark, - .daddr = daddr, - .saddr = saddr, - .flowi4_tos = tos, - .flowi4_proto = (inet->hdrincl ? - IPPROTO_RAW : - sk->sk_protocol), - .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 99e6e4bb1c72..6a83840b16af 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -424,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_WINDOW), (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), - r->rt_tos, + r->rt_key_tos, r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, r->dst.hh ? (r->dst.hh->hh_output == dev_queue_xmit) : 0, @@ -724,7 +724,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | (rt1->rt_mark ^ rt2->rt_mark) | - (rt1->rt_tos ^ rt2->rt_tos) | + (rt1->rt_key_tos ^ rt2->rt_key_tos) | (rt1->rt_oif ^ rt2->rt_oif) | (rt1->rt_iif ^ rt2->rt_iif)) == 0; } @@ -1349,7 +1349,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", - &rt->rt_dst, rt->rt_tos); + &rt->rt_dst, rt->rt_key_tos); #endif rt_del(hash, rt); ret = NULL; @@ -1507,7 +1507,7 @@ static inline unsigned short guess_mtu(unsigned short old_mtu) return 68; } -unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, +unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, unsigned short new_mtu, struct net_device *dev) { @@ -1710,7 +1710,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) struct flowi4 fl4 = { .daddr = rt->rt_key_dst, .saddr = rt->rt_key_src, - .flowi4_tos = rt->rt_tos, + .flowi4_tos = rt->rt_key_tos, .flowi4_oif = rt->rt_oif, .flowi4_iif = rt->rt_iif, .flowi4_mark = rt->rt_mark, @@ -1767,7 +1767,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, +static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { struct inet_peer *peer; @@ -1776,7 +1776,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, /* If a peer entry exists for this destination, we must hook * it up in order to get at cached metrics. */ - if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) + if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); @@ -1803,7 +1803,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, } } -static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, +static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { @@ -1813,7 +1813,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - rt_init_metrics(rt, oldflp4, fi); + rt_init_metrics(rt, fl4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -1830,20 +1830,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, #endif set_class_tag(rt, itag); #endif - rt->rt_type = type; } -static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) +static struct rtable *rt_dst_alloc(struct net_device *dev, + bool nopolicy, bool noxfrm) { - struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); - if (rt) { - rt->dst.obsolete = -1; - - rt->dst.flags = DST_HOST | - (nopolicy ? DST_NOPOLICY : 0) | - (noxfrm ? DST_NOXFRM : 0); - } - return rt; + return dst_alloc(&ipv4_dst_ops, dev, 1, -1, + DST_HOST | + (nopolicy ? DST_NOPOLICY : 0) | + (noxfrm ? DST_NOXFRM : 0)); } /* called in rcu_read_lock() section */ @@ -1871,36 +1866,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag, 0); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, + &itag); if (err < 0) goto e_err; } - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + rth = rt_dst_alloc(init_net.loopback_dev, + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; +#ifdef CONFIG_IP_ROUTE_CLASSID + rth->dst.tclassid = itag; +#endif rth->dst.output = ip_rt_bug; rth->rt_key_dst = daddr; - rth->rt_dst = daddr; - rth->rt_tos = tos; - rth->rt_mark = skb->mark; rth->rt_key_src = saddr; + rth->rt_genid = rt_genid(dev_net(dev)); + rth->rt_flags = RTCF_MULTICAST; + rth->rt_type = RTN_MULTICAST; + rth->rt_key_tos = tos; + rth->rt_dst = daddr; rth->rt_src = saddr; -#ifdef CONFIG_IP_ROUTE_CLASSID - rth->dst.tclassid = itag; -#endif rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->dst.dev = init_net.loopback_dev; - dev_hold(rth->dst.dev); rth->rt_oif = 0; + rth->rt_mark = skb->mark; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_genid = rt_genid(dev_net(dev)); - rth->rt_flags = RTCF_MULTICAST; - rth->rt_type = RTN_MULTICAST; + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1981,8 +1978,8 @@ static int __mkroute_input(struct sk_buff *skb, } - err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &spec_dst, &itag, skb->mark); + err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), + in_dev->dev, &spec_dst, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2013,7 +2010,8 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + rth = rt_dst_alloc(out_dev->dev, + IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; @@ -2021,27 +2019,28 @@ static int __mkroute_input(struct sk_buff *skb, } rth->rt_key_dst = daddr; - rth->rt_dst = daddr; - rth->rt_tos = tos; - rth->rt_mark = skb->mark; rth->rt_key_src = saddr; + rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); + rth->rt_flags = flags; + rth->rt_type = res->type; + rth->rt_key_tos = tos; + rth->rt_dst = daddr; rth->rt_src = saddr; - rth->rt_gateway = daddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; - rth->dst.dev = (out_dev)->dev; - dev_hold(rth->dst.dev); rth->rt_oif = 0; + rth->rt_mark = skb->mark; + rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; rth->dst.input = ip_forward; rth->dst.output = ip_output; - rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); - rth->rt_flags = flags; - *result = rth; err = 0; cleanup: @@ -2150,9 +2149,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; if (res.type == RTN_LOCAL) { - err = fib_validate_source(saddr, daddr, tos, + err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &spec_dst, &itag, skb->mark); + dev, &spec_dst, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2176,8 +2175,8 @@ brd_input: if (ipv4_is_zeronet(saddr)) spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag, skb->mark); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, + &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2188,36 +2187,42 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + rth = rt_dst_alloc(net->loopback_dev, + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; + rth->dst.input= ip_local_deliver; rth->dst.output= ip_rt_bug; - rth->rt_genid = rt_genid(net); +#ifdef CONFIG_IP_ROUTE_CLASSID + rth->dst.tclassid = itag; +#endif rth->rt_key_dst = daddr; - rth->rt_dst = daddr; - rth->rt_tos = tos; - rth->rt_mark = skb->mark; rth->rt_key_src = saddr; + rth->rt_genid = rt_genid(net); + rth->rt_flags = flags|RTCF_LOCAL; + rth->rt_type = res.type; + rth->rt_key_tos = tos; + rth->rt_dst = daddr; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->dst.dev = net->loopback_dev; - dev_hold(rth->dst.dev); + rth->rt_oif = 0; + rth->rt_mark = skb->mark; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->dst.input= ip_local_deliver; - rth->rt_flags = flags|RTCF_LOCAL; + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; @@ -2288,7 +2293,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | (rth->rt_iif ^ iif) | rth->rt_oif | - (rth->rt_tos ^ tos)) == 0 && + (rth->rt_key_tos ^ tos)) == 0 && rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { @@ -2349,12 +2354,12 @@ EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, const struct flowi4 *fl4, - const struct flowi4 *oldflp4, - struct net_device *dev_out, + __be32 orig_daddr, __be32 orig_saddr, + int orig_oif, struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; - u32 tos = RT_FL_TOS(oldflp4); + u32 tos = RT_FL_TOS(fl4); struct in_device *in_dev; u16 type = res->type; struct rtable *rth; @@ -2381,8 +2386,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; - if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, - oldflp4->flowi4_proto)) + if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, + fl4->flowi4_proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. @@ -2392,29 +2397,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + rth = rt_dst_alloc(dev_out, + IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); - rth->rt_key_dst = oldflp4->daddr; - rth->rt_tos = tos; - rth->rt_key_src = oldflp4->saddr; - rth->rt_oif = oldflp4->flowi4_oif; - rth->rt_mark = oldflp4->flowi4_mark; + rth->dst.output = ip_output; + + rth->rt_key_dst = orig_daddr; + rth->rt_key_src = orig_saddr; + rth->rt_genid = rt_genid(dev_net(dev_out)); + rth->rt_flags = flags; + rth->rt_type = type; + rth->rt_key_tos = tos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; - rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; - /* get references to the devices that are to be hold by the routing - cache entry */ - rth->dst.dev = dev_out; - dev_hold(dev_out); + rth->rt_iif = orig_oif ? : dev_out->ifindex; + rth->rt_oif = orig_oif; + rth->rt_mark = fl4->flowi4_mark; rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; - - rth->dst.output=ip_output; - rth->rt_genid = rt_genid(dev_net(dev_out)); + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2432,7 +2439,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #ifdef CONFIG_IP_MROUTE if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && - !ipv4_is_local_multicast(oldflp4->daddr)) { + !ipv4_is_local_multicast(fl4->daddr)) { rth->dst.input = ip_mr_input; rth->dst.output = ip_mc_output; } @@ -2440,9 +2447,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #endif } - rt_set_nexthop(rth, oldflp4, res, fi, type, 0); + rt_set_nexthop(rth, fl4, res, fi, type, 0); - rth->rt_flags = flags; return rth; } @@ -2451,36 +2457,37 @@ static struct rtable *__mkroute_output(const struct fib_result *res, * called with rcu_read_lock(); */ -static struct rtable *ip_route_output_slow(struct net *net, - const struct flowi4 *oldflp4) +static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) { - u32 tos = RT_FL_TOS(oldflp4); - struct flowi4 fl4; - struct fib_result res; - unsigned int flags = 0; struct net_device *dev_out = NULL; + u32 tos = RT_FL_TOS(fl4); + unsigned int flags = 0; + struct fib_result res; struct rtable *rth; + __be32 orig_daddr; + __be32 orig_saddr; + int orig_oif; res.fi = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - fl4.flowi4_oif = oldflp4->flowi4_oif; - fl4.flowi4_iif = net->loopback_dev->ifindex; - fl4.flowi4_mark = oldflp4->flowi4_mark; - fl4.daddr = oldflp4->daddr; - fl4.saddr = oldflp4->saddr; - fl4.flowi4_tos = tos & IPTOS_RT_MASK; - fl4.flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + orig_daddr = fl4->daddr; + orig_saddr = fl4->saddr; + orig_oif = fl4->flowi4_oif; + + fl4->flowi4_iif = net->loopback_dev->ifindex; + fl4->flowi4_tos = tos & IPTOS_RT_MASK; + fl4->flowi4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); rcu_read_lock(); - if (oldflp4->saddr) { + if (fl4->saddr) { rth = ERR_PTR(-EINVAL); - if (ipv4_is_multicast(oldflp4->saddr) || - ipv4_is_lbcast(oldflp4->saddr) || - ipv4_is_zeronet(oldflp4->saddr)) + if (ipv4_is_multicast(fl4->saddr) || + ipv4_is_lbcast(fl4->saddr) || + ipv4_is_zeronet(fl4->saddr)) goto out; /* I removed check for oif == dev_out->oif here. @@ -2491,11 +2498,11 @@ static struct rtable *ip_route_output_slow(struct net *net, of another iface. --ANK */ - if (oldflp4->flowi4_oif == 0 && - (ipv4_is_multicast(oldflp4->daddr) || - ipv4_is_lbcast(oldflp4->daddr))) { + if (fl4->flowi4_oif == 0 && + (ipv4_is_multicast(fl4->daddr) || + ipv4_is_lbcast(fl4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = __ip_dev_find(net, oldflp4->saddr, false); + dev_out = __ip_dev_find(net, fl4->saddr, false); if (dev_out == NULL) goto out; @@ -2514,20 +2521,20 @@ static struct rtable *ip_route_output_slow(struct net *net, Luckily, this hack is good workaround. */ - fl4.flowi4_oif = dev_out->ifindex; + fl4->flowi4_oif = dev_out->ifindex; goto make_route; } - if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { + if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - if (!__ip_dev_find(net, oldflp4->saddr, false)) + if (!__ip_dev_find(net, fl4->saddr, false)) goto out; } } - if (oldflp4->flowi4_oif) { - dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); + if (fl4->flowi4_oif) { + dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; @@ -2537,37 +2544,37 @@ static struct rtable *ip_route_output_slow(struct net *net, rth = ERR_PTR(-ENETUNREACH); goto out; } - if (ipv4_is_local_multicast(oldflp4->daddr) || - ipv4_is_lbcast(oldflp4->daddr)) { - if (!fl4.saddr) - fl4.saddr = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (ipv4_is_local_multicast(fl4->daddr) || + ipv4_is_lbcast(fl4->daddr)) { + if (!fl4->saddr) + fl4->saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); goto make_route; } - if (!fl4.saddr) { - if (ipv4_is_multicast(oldflp4->daddr)) - fl4.saddr = inet_select_addr(dev_out, 0, - fl4.flowi4_scope); - else if (!oldflp4->daddr) - fl4.saddr = inet_select_addr(dev_out, 0, - RT_SCOPE_HOST); + if (fl4->saddr) { + if (ipv4_is_multicast(fl4->daddr)) + fl4->saddr = inet_select_addr(dev_out, 0, + fl4->flowi4_scope); + else if (!fl4->daddr) + fl4->saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_HOST); } } - if (!fl4.daddr) { - fl4.daddr = fl4.saddr; - if (!fl4.daddr) - fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); + if (!fl4->daddr) { + fl4->daddr = fl4->saddr; + if (!fl4->daddr) + fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl4.flowi4_oif = net->loopback_dev->ifindex; + fl4->flowi4_oif = net->loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - if (fib_lookup(net, &fl4, &res)) { + if (fib_lookup(net, fl4, &res)) { res.fi = NULL; - if (oldflp4->flowi4_oif) { + if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2586,9 +2593,9 @@ static struct rtable *ip_route_output_slow(struct net *net, likely IPv6, but we do not. */ - if (fl4.saddr == 0) - fl4.saddr = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (fl4->saddr == 0) + fl4->saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; } @@ -2597,42 +2604,45 @@ static struct rtable *ip_route_output_slow(struct net *net, } if (res.type == RTN_LOCAL) { - if (!fl4.saddr) { + if (!fl4->saddr) { if (res.fi->fib_prefsrc) - fl4.saddr = res.fi->fib_prefsrc; + fl4->saddr = res.fi->fib_prefsrc; else - fl4.saddr = fl4.daddr; + fl4->saddr = fl4->daddr; } dev_out = net->loopback_dev; - fl4.flowi4_oif = dev_out->ifindex; + fl4->flowi4_oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) + if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) + if (!res.prefixlen && + res.table->tb_num_default > 1 && + res.type == RTN_UNICAST && !fl4->flowi4_oif) fib_select_default(&res); - if (!fl4.saddr) - fl4.saddr = FIB_RES_PREFSRC(net, res); + if (!fl4->saddr) + fl4->saddr = FIB_RES_PREFSRC(net, res); dev_out = FIB_RES_DEV(res); - fl4.flowi4_oif = dev_out->ifindex; + fl4->flowi4_oif = dev_out->ifindex; make_route: - rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); + rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, + dev_out, flags); if (!IS_ERR(rth)) { unsigned int hash; - hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, + hash = rt_hash(orig_daddr, orig_saddr, orig_oif, rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); + rth = rt_intern_hash(hash, rth, NULL, orig_oif); } out: @@ -2640,7 +2650,7 @@ out: return rth; } -struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) +struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) { struct rtable *rth; unsigned int hash; @@ -2658,13 +2668,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) rt_is_output_route(rth) && rth->rt_oif == flp4->flowi4_oif && rth->rt_mark == flp4->flowi4_mark && - !((rth->rt_tos ^ flp4->flowi4_tos) & + !((rth->rt_key_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); + if (!flp4->saddr) + flp4->saddr = rth->rt_src; + if (!flp4->daddr) + flp4->daddr = rth->rt_dst; return rth; } RT_CACHE_STAT_INC(out_hlist_search); @@ -2709,7 +2723,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); + struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); struct rtable *ort = (struct rtable *) dst_orig; if (rt) { @@ -2726,7 +2740,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_key_dst = ort->rt_key_dst; rt->rt_key_src = ort->rt_key_src; - rt->rt_tos = ort->rt_tos; + rt->rt_key_tos = ort->rt_key_tos; rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; @@ -2762,15 +2776,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, if (IS_ERR(rt)) return rt; - if (flp4->flowi4_proto) { - if (!flp4->saddr) - flp4->saddr = rt->rt_src; - if (!flp4->daddr) - flp4->daddr = rt->rt_dst; + if (flp4->flowi4_proto) rt = (struct rtable *) xfrm_lookup(net, &rt->dst, flowi4_to_flowi(flp4), sk, 0); - } return rt; } @@ -2794,7 +2803,7 @@ static int rt_fill_info(struct net *net, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = rt->rt_tos; + r->rtm_tos = rt->rt_key_tos; r->rtm_table = RT_TABLE_MAIN; NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; @@ -2848,7 +2857,9 @@ static int rt_fill_info(struct net *net, if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { - int err = ipmr_get_route(net, skb, r, nowait); + int err = ipmr_get_route(net, skb, + rt->rt_src, rt->rt_dst, + r, nowait); if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b44c6d2a79b..26461492a847 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * the ACK carries the same options again (see RFC1122 4.2.3.8) */ if (opt && opt->optlen) { - int opt_size = sizeof(struct ip_options) + opt->optlen; + int opt_size = sizeof(struct ip_options_rcu) + opt->optlen; ireq->opt = kmalloc(opt_size, GFP_ATOMIC); - if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { + if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) { kfree(ireq->opt); ireq->opt = NULL; } @@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi4 fl4 = { - .flowi4_mark = sk->sk_mark, - .daddr = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .saddr = ireq->loc_addr, - .flowi4_tos = RT_CONN_FLAGS(sk), - .flowi4_proto = IPPROTO_TCP, - .flowi4_flags = inet_sk_flowi_flags(sk), - .fl4_sport = th->dest, - .fl4_dport = th->source, - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + RT_SCOPE_UNIVERSE, IPPROTO_TCP, + inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b22d45010545..054a59d21eb0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -999,7 +999,8 @@ new_segment: /* We have some space in skb head. Superb! */ if (copy > skb_tailroom(skb)) copy = skb_tailroom(skb); - if ((err = skb_add_data(skb, from, copy)) != 0) + err = skb_add_data_nocache(sk, skb, from, copy); + if (err) goto do_fault; } else { int merge = 0; @@ -1042,8 +1043,8 @@ new_segment: /* Time to copy data. We are close to * the end! */ - err = skb_copy_to_page(sk, from, skb, page, - off, copy); + err = skb_copy_to_page_nocache(sk, from, skb, + page, off, copy); if (err) { /* If this page was new, give it to the * socket so it does not get leaked. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f7e6c2c2d2bb..f3d16d8918c7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique); /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; __be16 orig_sport, orig_dport; - struct rtable *rt; __be32 daddr, nexthop; + struct flowi4 fl4; + struct rtable *rt; int err; + struct ip_options_rcu *inet_opt; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -161,15 +163,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt && inet->opt->srr) { + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + if (inet_opt && inet_opt->opt.srr) { if (!daddr) return -EINVAL; - nexthop = inet->opt->faddr; + nexthop = inet_opt->opt.faddr; } orig_sport = inet->inet_sport; orig_dport = usin->sin_port; - rt = ip_route_connect(nexthop, inet->inet_saddr, + rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport, orig_dport, sk, true); @@ -185,11 +189,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -ENETUNREACH; } - if (!inet->opt || !inet->opt->srr) - daddr = rt->rt_dst; + if (!inet_opt || !inet_opt->opt.srr) + daddr = fl4.daddr; if (!inet->inet_saddr) - inet->inet_saddr = rt->rt_src; + inet->inet_saddr = fl4.saddr; inet->inet_rcv_saddr = inet->inet_saddr; if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { @@ -200,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (tcp_death_row.sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { + !tp->rx_opt.ts_recent_stamp && fl4.daddr == daddr) { struct inet_peer *peer = rt_get_peer(rt); /* * VJ's idea. We save last timestamp seen from @@ -221,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; @@ -236,8 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; - rt = ip_route_newports(rt, IPPROTO_TCP, - orig_sport, orig_dport, + rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); @@ -279,7 +282,7 @@ EXPORT_SYMBOL(tcp_v4_connect); /* * This routine does path mtu discovery as defined in RFC1191. */ -static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) +static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); @@ -341,7 +344,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) { - struct iphdr *iph = (struct iphdr *)icmp_skb->data; + const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); struct inet_connection_sock *icsk; struct tcp_sock *tp; @@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb) /* * Save and compile IPv4 options into the request_sock if needed. */ -static struct ip_options *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) +static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, + struct sk_buff *skb) { - struct ip_options *opt = &(IPCB(skb)->opt); - struct ip_options *dopt = NULL; + const struct ip_options *opt = &(IPCB(skb)->opt); + struct ip_options_rcu *dopt = NULL; if (opt && opt->optlen) { - int opt_size = optlength(opt); + int opt_size = sizeof(*dopt) + opt->optlen; + dopt = kmalloc(opt_size, GFP_ATOMIC); if (dopt) { - if (ip_options_echo(dopt, skb)) { + if (ip_options_echo(&dopt->opt, skb)) { kfree(dopt); dopt = NULL; } @@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct ip_options_rcu *inet_opt; if (sk_acceptq_is_full(sk)) goto exit_overflow; @@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->inet_daddr = ireq->rmt_addr; newinet->inet_rcv_saddr = ireq->loc_addr; newinet->inet_saddr = ireq->loc_addr; - newinet->opt = ireq->opt; + inet_opt = ireq->opt; + rcu_assign_pointer(newinet->inet_opt, inet_opt); ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newinet->opt) - inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; + if (inet_opt) + inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; tcp_mtup_init(newsk); @@ -2527,7 +2533,7 @@ void tcp4_proc_exit(void) struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - struct iphdr *iph = skb_gro_network_header(skb); + const struct iphdr *iph = skb_gro_network_header(skb); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -2548,7 +2554,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) int tcp4_gro_complete(struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f87a8eb76f3b..544f435d1aff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -578,7 +578,7 @@ found: void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -804,6 +804,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; + struct ip_options_data opt_copy; if (len > 0xFFFF) return -EMSGSIZE; @@ -877,22 +878,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, free = 1; connected = 0; } - if (!ipc.opt) - ipc.opt = inet->opt; + if (!ipc.opt) { + struct ip_options_rcu *inet_opt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + memcpy(&opt_copy, inet_opt, + sizeof(*inet_opt) + inet_opt->opt.optlen); + ipc.opt = &opt_copy.opt; + } + rcu_read_unlock(); + } saddr = ipc.addr; ipc.addr = faddr = daddr; - if (ipc.opt && ipc.opt->srr) { + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; - faddr = ipc.opt->faddr; + faddr = ipc.opt->opt.faddr; connected = 0; } tos = RT_TOS(inet->tos); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->is_strictroute)) { + (ipc.opt && ipc.opt->opt.is_strictroute)) { tos |= RTO_ONLINK; connected = 0; } @@ -909,20 +920,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi4 fl4 = { - .flowi4_oif = ipc.oif, - .flowi4_mark = sk->sk_mark, - .daddr = faddr, - .saddr = saddr, - .flowi4_tos = tos, - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl4_sport = inet->inet_sport, - .fl4_dport = dport, - }; + struct flowi4 fl4; struct net *net = sock_net(sk); + flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, sk->sk_protocol, + inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, + faddr, saddr, dport, inet->inet_sport); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index d20a05e970d8..7ff973bd02dd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -73,7 +73,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl4->daddr; rt->rt_key_src = fl4->saddr; - rt->rt_tos = fl4->flowi4_tos; + rt->rt_key_tos = fl4->flowi4_tos; rt->rt_route_iif = fl4->flowi4_iif; rt->rt_iif = fl4->flowi4_iif; rt->rt_oif = fl4->flowi4_oif; @@ -102,7 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, static void _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1717c64628d1..ea983ae96ae6 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -55,7 +55,7 @@ xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, int xfrm4_extract_header(struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = iph->id; |