summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/cipso_ipv4.c1607
-rw-r--r--net/ipv4/devinet.c246
-rw-r--r--net/ipv4/esp4.c15
-rw-r--r--net/ipv4/fib_frontend.c472
-rw-r--r--net/ipv4/fib_hash.c126
-rw-r--r--net/ipv4/fib_lookup.h13
-rw-r--r--net/ipv4/fib_rules.c620
-rw-r--r--net/ipv4/fib_semantics.c518
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/icmp.c16
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/inet_hashtables.c33
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_gre.c27
-rw-r--r--net/ipv4/ip_options.c20
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ipcomp.c8
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c22
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c8
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/Kconfig22
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c4
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c215
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c516
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c76
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c52
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c31
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c56
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c63
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c188
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c27
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c7
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c15
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c14
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c13
-rw-r--r--net/ipv4/netfilter/ip_queue.c16
-rw-r--r--net/ipv4/netfilter/ip_tables.c184
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c96
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c48
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c4
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c7
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c135
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c26
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c12
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c3
-rw-r--r--net/ipv4/netfilter/ipt_ah.c1
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c54
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c3
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c33
-rw-r--r--net/ipv4/netfilter/ipt_owner.c1
-rw-r--r--net/ipv4/netfilter/ipt_recent.c13
-rw-r--r--net/ipv4/netfilter/iptable_filter.c4
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c166
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c35
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_input.c40
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_lp.c3
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c30
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_veno.c3
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/udp.c120
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_mode_transport.c4
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c3
-rw-r--r--net/ipv4/xfrm4_output.c10
-rw-r--r--net/ipv4/xfrm4_policy.c26
-rw-r--r--net/ipv4/xfrm4_state.c84
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
108 files changed, 3898 insertions, 2707 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 3b5d504a74be..1650b64415aa 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -88,6 +88,7 @@ config IP_FIB_HASH
config IP_MULTIPLE_TABLES
bool "IP: policy routing"
depends on IP_ADVANCED_ROUTER
+ select FIB_RULES
---help---
Normally, a router decides what to do with a received packet based
solely on the packet's final destination address. If you say Y here,
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4878fc5be85f..f66049e28aeb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c84a32070f8d..fdd89e37b9aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -392,7 +391,7 @@ int inet_release(struct socket *sock)
}
/* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind;
+int sysctl_ip_nonlocal_bind __read_mostly;
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
* Shall we try to damage output packets if routing dev changes?
*/
-int sysctl_ip_dynaddr;
+int sysctl_ip_dynaddr __read_mostly;
static int inet_sk_reselect_saddr(struct sock *sk)
{
@@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk)
},
};
+ security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, 0);
}
if (!err)
@@ -1254,10 +1254,7 @@ static int __init inet_init(void)
struct list_head *r;
int rc = -EINVAL;
- if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
- printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
- goto out;
- }
+ BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
rc = proto_register(&tcp_prot, 1);
if (rc)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2b98943e6b02..99542977e47e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
switch (*optptr) {
case IPOPT_SEC:
case 0x85: /* Some "Extended Security" crap. */
- case 0x86: /* Another "Commercial Security" crap. */
+ case IPOPT_CIPSO:
case IPOPT_RA:
case 0x80|21: /* RFC1770 */
break;
@@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x)
goto error;
x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
x->data = ahp;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
new file mode 100644
index 000000000000..80a2a0911b49
--- /dev/null
+++ b/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1607 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory. While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+struct cipso_v4_domhsh_entry {
+ char *domain;
+ u32 valid;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+/* List of available DOI definitions */
+/* XXX - Updates should be minimal so having a single lock for the
+ * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
+ * okay. */
+/* XXX - This currently assumes a minimal number of different DOIs in use,
+ * if in practice there are a lot of different DOIs this list should
+ * probably be turned into a hash table or something similar so we
+ * can do quick lookups. */
+static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
+
+/* Label mapping cache */
+int cipso_v4_cache_enabled = 1;
+int cipso_v4_cache_bucketsize = 10;
+#define CIPSO_V4_CACHE_BUCKETBITS 7
+#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS)
+#define CIPSO_V4_CACHE_REORDERLIMIT 10
+struct cipso_v4_map_cache_bkt {
+ spinlock_t lock;
+ u32 size;
+ struct list_head list;
+};
+struct cipso_v4_map_cache_entry {
+ u32 hash;
+ unsigned char *key;
+ size_t key_len;
+
+ struct netlbl_lsm_cache lsm_data;
+
+ u32 activity;
+ struct list_head list;
+};
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+/* Restricted bitmap (tag #1) flags */
+int cipso_v4_rbm_optfmt = 0;
+int cipso_v4_rbm_strictvalid = 1;
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
+ * @bitmap: the bitmap
+ * @bitmap_len: length in bits
+ * @offset: starting offset
+ * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
+ *
+ * Description:
+ * Starting at @offset, walk the bitmap from left to right until either the
+ * desired bit is found or we reach the end. Return the bit offset, -1 if
+ * not found, or -2 if error.
+ */
+static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
+ u32 bitmap_len,
+ u32 offset,
+ u8 state)
+{
+ u32 bit_spot;
+ u32 byte_offset;
+ unsigned char bitmask;
+ unsigned char byte;
+
+ /* gcc always rounds to zero when doing integer division */
+ byte_offset = offset / 8;
+ byte = bitmap[byte_offset];
+ bit_spot = offset;
+ bitmask = 0x80 >> (offset % 8);
+
+ while (bit_spot < bitmap_len) {
+ if ((state && (byte & bitmask) == bitmask) ||
+ (state == 0 && (byte & bitmask) == 0))
+ return bit_spot;
+
+ bit_spot++;
+ bitmask >>= 1;
+ if (bitmask == 0) {
+ byte = bitmap[++byte_offset];
+ bitmask = 0x80;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
+ * @bitmap: the bitmap
+ * @bit: the bit
+ * @state: if non-zero, set the bit (1) else clear the bit (0)
+ *
+ * Description:
+ * Set a single bit in the bitmask. Returns zero on success, negative values
+ * on error.
+ */
+static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
+ u32 bit,
+ u8 state)
+{
+ u32 byte_spot;
+ u8 bitmask;
+
+ /* gcc always rounds to zero when doing integer division */
+ byte_spot = bit / 8;
+ bitmask = 0x80 >> (bit % 8);
+ if (state)
+ bitmap[byte_spot] |= bitmask;
+ else
+ bitmap[byte_spot] &= ~bitmask;
+}
+
+/**
+ * cipso_v4_doi_domhsh_free - Frees a domain list entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a domain list entry can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
+{
+ struct cipso_v4_domhsh_entry *ptr;
+
+ ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
+ kfree(ptr->domain);
+ kfree(ptr);
+}
+
+/**
+ * cipso_v4_cache_entry_free - Frees a cache entry
+ * @entry: the entry to free
+ *
+ * Description:
+ * This function frees the memory associated with a cache entry.
+ *
+ */
+static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
+{
+ if (entry->lsm_data.free)
+ entry->lsm_data.free(entry->lsm_data.data);
+ kfree(entry->key);
+ kfree(entry);
+}
+
+/**
+ * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
+ * @key: the hash key
+ * @key_len: the length of the key in bytes
+ *
+ * Description:
+ * The CIPSO tag hashing function. Returns a 32-bit hash value.
+ *
+ */
+static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
+{
+ return jhash(key, key_len, 0);
+}
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+/**
+ * cipso_v4_cache_init - Initialize the CIPSO cache
+ *
+ * Description:
+ * Initializes the CIPSO label mapping cache, this function should be called
+ * before any of the other functions defined in this file. Returns zero on
+ * success, negative values on error.
+ *
+ */
+static int cipso_v4_cache_init(void)
+{
+ u32 iter;
+
+ cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
+ sizeof(struct cipso_v4_map_cache_bkt),
+ GFP_KERNEL);
+ if (cipso_v4_cache == NULL)
+ return -ENOMEM;
+
+ for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+ spin_lock_init(&cipso_v4_cache[iter].lock);
+ cipso_v4_cache[iter].size = 0;
+ INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
+ *
+ * Description:
+ * Invalidates and frees any entries in the CIPSO cache. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+void cipso_v4_cache_invalidate(void)
+{
+ struct cipso_v4_map_cache_entry *entry, *tmp_entry;
+ u32 iter;
+
+ for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+ spin_lock(&cipso_v4_cache[iter].lock);
+ list_for_each_entry_safe(entry,
+ tmp_entry,
+ &cipso_v4_cache[iter].list, list) {
+ list_del(&entry->list);
+ cipso_v4_cache_entry_free(entry);
+ }
+ cipso_v4_cache[iter].size = 0;
+ spin_unlock(&cipso_v4_cache[iter].lock);
+ }
+
+ return;
+}
+
+/**
+ * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
+ * @key: the buffer to check
+ * @key_len: buffer length in bytes
+ * @secattr: the security attribute struct to use
+ *
+ * Description:
+ * This function checks the cache to see if a label mapping already exists for
+ * the given key. If there is a match then the cache is adjusted and the
+ * @secattr struct is populated with the correct LSM security attributes. The
+ * cache is adjusted in the following manner if the entry is not already the
+ * first in the cache bucket:
+ *
+ * 1. The cache entry's activity counter is incremented
+ * 2. The previous (higher ranking) entry's activity counter is decremented
+ * 3. If the difference between the two activity counters is geater than
+ * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
+ *
+ * Returns zero on success, -ENOENT for a cache miss, and other negative values
+ * on error.
+ *
+ */
+static int cipso_v4_cache_check(const unsigned char *key,
+ u32 key_len,
+ struct netlbl_lsm_secattr *secattr)
+{
+ u32 bkt;
+ struct cipso_v4_map_cache_entry *entry;
+ struct cipso_v4_map_cache_entry *prev_entry = NULL;
+ u32 hash;
+
+ if (!cipso_v4_cache_enabled)
+ return -ENOENT;
+
+ hash = cipso_v4_map_cache_hash(key, key_len);
+ bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+ spin_lock(&cipso_v4_cache[bkt].lock);
+ list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
+ if (entry->hash == hash &&
+ entry->key_len == key_len &&
+ memcmp(entry->key, key, key_len) == 0) {
+ entry->activity += 1;
+ secattr->cache.free = entry->lsm_data.free;
+ secattr->cache.data = entry->lsm_data.data;
+ if (prev_entry == NULL) {
+ spin_unlock(&cipso_v4_cache[bkt].lock);
+ return 0;
+ }
+
+ if (prev_entry->activity > 0)
+ prev_entry->activity -= 1;
+ if (entry->activity > prev_entry->activity &&
+ entry->activity - prev_entry->activity >
+ CIPSO_V4_CACHE_REORDERLIMIT) {
+ __list_del(entry->list.prev, entry->list.next);
+ __list_add(&entry->list,
+ prev_entry->list.prev,
+ &prev_entry->list);
+ }
+
+ spin_unlock(&cipso_v4_cache[bkt].lock);
+ return 0;
+ }
+ prev_entry = entry;
+ }
+ spin_unlock(&cipso_v4_cache[bkt].lock);
+
+ return -ENOENT;
+}
+
+/**
+ * cipso_v4_cache_add - Add an entry to the CIPSO cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add a new entry into the CIPSO label mapping cache. Add the new entry to
+ * head of the cache bucket's list, if the cache bucket is out of room remove
+ * the last entry in the list first. It is important to note that there is
+ * currently no checking for duplicate keys. Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int cipso_v4_cache_add(const struct sk_buff *skb,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -EPERM;
+ u32 bkt;
+ struct cipso_v4_map_cache_entry *entry = NULL;
+ struct cipso_v4_map_cache_entry *old_entry = NULL;
+ unsigned char *cipso_ptr;
+ u32 cipso_ptr_len;
+
+ if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ return 0;
+
+ cipso_ptr = CIPSO_V4_OPTPTR(skb);
+ cipso_ptr_len = cipso_ptr[1];
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (entry == NULL)
+ return -ENOMEM;
+ entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
+ if (entry->key == NULL) {
+ ret_val = -ENOMEM;
+ goto cache_add_failure;
+ }
+ memcpy(entry->key, cipso_ptr, cipso_ptr_len);
+ entry->key_len = cipso_ptr_len;
+ entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
+ entry->lsm_data.free = secattr->cache.free;
+ entry->lsm_data.data = secattr->cache.data;
+
+ bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+ spin_lock(&cipso_v4_cache[bkt].lock);
+ if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ list_add(&entry->list, &cipso_v4_cache[bkt].list);
+ cipso_v4_cache[bkt].size += 1;
+ } else {
+ old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
+ struct cipso_v4_map_cache_entry, list);
+ list_del(&old_entry->list);
+ list_add(&entry->list, &cipso_v4_cache[bkt].list);
+ cipso_v4_cache_entry_free(old_entry);
+ }
+ spin_unlock(&cipso_v4_cache[bkt].lock);
+
+ return 0;
+
+cache_add_failure:
+ if (entry)
+ cipso_v4_cache_entry_free(entry);
+ return ret_val;
+}
+
+/*
+ * DOI List Functions
+ */
+
+/**
+ * cipso_v4_doi_search - Searches for a DOI definition
+ * @doi: the DOI to search for
+ *
+ * Description:
+ * Search the DOI definition list for a DOI definition with a DOI value that
+ * matches @doi. The caller is responsibile for calling rcu_read_[un]lock().
+ * Returns a pointer to the DOI definition on success and NULL on failure.
+ */
+static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
+{
+ struct cipso_v4_doi *iter;
+
+ list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+ if (iter->doi == doi && iter->valid)
+ return iter;
+ return NULL;
+}
+
+/**
+ * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
+ * @doi_def: the DOI structure
+ *
+ * Description:
+ * The caller defines a new DOI for use by the CIPSO engine and calls this
+ * function to add it to the list of acceptable domains. The caller must
+ * ensure that the mapping table specified in @doi_def->map meets all of the
+ * requirements of the mapping type (see cipso_ipv4.h for details). Returns
+ * zero on success and non-zero on failure.
+ *
+ */
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+ if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+ return -EINVAL;
+
+ doi_def->valid = 1;
+ INIT_RCU_HEAD(&doi_def->rcu);
+ INIT_LIST_HEAD(&doi_def->dom_list);
+
+ rcu_read_lock();
+ if (cipso_v4_doi_search(doi_def->doi) != NULL)
+ goto doi_add_failure_rlock;
+ spin_lock(&cipso_v4_doi_list_lock);
+ if (cipso_v4_doi_search(doi_def->doi) != NULL)
+ goto doi_add_failure_slock;
+ list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+
+ return 0;
+
+doi_add_failure_slock:
+ spin_unlock(&cipso_v4_doi_list_lock);
+doi_add_failure_rlock:
+ rcu_read_unlock();
+ return -EEXIST;
+}
+
+/**
+ * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
+ * @doi: the DOI value
+ * @callback: the DOI cleanup/free callback
+ *
+ * Description:
+ * Removes a DOI definition from the CIPSO engine, @callback is called to
+ * free any memory. The NetLabel routines will be called to release their own
+ * LSM domain mappings as well as our own domain list. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
+{
+ struct cipso_v4_doi *doi_def;
+ struct cipso_v4_domhsh_entry *dom_iter;
+
+ rcu_read_lock();
+ if (cipso_v4_doi_search(doi) != NULL) {
+ spin_lock(&cipso_v4_doi_list_lock);
+ doi_def = cipso_v4_doi_search(doi);
+ if (doi_def == NULL) {
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+ return -ENOENT;
+ }
+ doi_def->valid = 0;
+ list_del_rcu(&doi_def->list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
+ if (dom_iter->valid)
+ netlbl_domhsh_remove(dom_iter->domain);
+ cipso_v4_cache_invalidate();
+ rcu_read_unlock();
+
+ call_rcu(&doi_def->rcu, callback);
+ return 0;
+ }
+ rcu_read_unlock();
+
+ return -ENOENT;
+}
+
+/**
+ * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * @doi: the DOI value
+ *
+ * Description:
+ * Searches for a valid DOI definition and if one is found it is returned to
+ * the caller. Otherwise NULL is returned. The caller must ensure that
+ * rcu_read_lock() is held while accessing the returned definition.
+ *
+ */
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+ return cipso_v4_doi_search(doi);
+}
+
+/**
+ * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Dump a list of all the configured DOI values into a sk_buff. The returned
+ * sk_buff has room at the front of the sk_buff for @headroom bytes. See
+ * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This
+ * function may fail if another process is changing the DOI list at the same
+ * time. Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+ struct sk_buff *skb = NULL;
+ struct cipso_v4_doi *iter;
+ u32 doi_cnt = 0;
+ ssize_t buf_len;
+
+ buf_len = NETLBL_LEN_U32;
+ rcu_read_lock();
+ list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+ if (iter->valid) {
+ doi_cnt += 1;
+ buf_len += 2 * NETLBL_LEN_U32;
+ }
+
+ skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+ if (skb == NULL)
+ goto doi_dump_all_failure;
+
+ if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
+ goto doi_dump_all_failure;
+ buf_len -= NETLBL_LEN_U32;
+ list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+ if (iter->valid) {
+ if (buf_len < 2 * NETLBL_LEN_U32)
+ goto doi_dump_all_failure;
+ if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
+ goto doi_dump_all_failure;
+ if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+ goto doi_dump_all_failure;
+ buf_len -= 2 * NETLBL_LEN_U32;
+ }
+ rcu_read_unlock();
+
+ return skb;
+
+doi_dump_all_failure:
+ rcu_read_unlock();
+ kfree(skb);
+ return NULL;
+}
+
+/**
+ * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
+ * @doi: the DOI value
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Lookup the DOI definition matching @doi and dump it's contents into a
+ * sk_buff. The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message
+ * format. This function may fail if another process is changing the DOI list
+ * at the same time. Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+ struct sk_buff *skb = NULL;
+ struct cipso_v4_doi *iter;
+ u32 tag_cnt = 0;
+ u32 lvl_cnt = 0;
+ u32 cat_cnt = 0;
+ ssize_t buf_len;
+ ssize_t tmp;
+
+ rcu_read_lock();
+ iter = cipso_v4_doi_getdef(doi);
+ if (iter == NULL)
+ goto doi_dump_failure;
+ buf_len = NETLBL_LEN_U32;
+ switch (iter->type) {
+ case CIPSO_V4_MAP_PASS:
+ buf_len += NETLBL_LEN_U32;
+ while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+ iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+ tag_cnt += 1;
+ buf_len += NETLBL_LEN_U8;
+ }
+ break;
+ case CIPSO_V4_MAP_STD:
+ buf_len += 3 * NETLBL_LEN_U32;
+ while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+ iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+ tag_cnt += 1;
+ buf_len += NETLBL_LEN_U8;
+ }
+ for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+ if (iter->map.std->lvl.local[tmp] !=
+ CIPSO_V4_INV_LVL) {
+ lvl_cnt += 1;
+ buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
+ }
+ for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+ if (iter->map.std->cat.local[tmp] !=
+ CIPSO_V4_INV_CAT) {
+ cat_cnt += 1;
+ buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
+ }
+ break;
+ }
+
+ skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+ if (skb == NULL)
+ goto doi_dump_failure;
+
+ if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+ goto doi_dump_failure;
+ buf_len -= NETLBL_LEN_U32;
+ if (iter != cipso_v4_doi_getdef(doi))
+ goto doi_dump_failure;
+ switch (iter->type) {
+ case CIPSO_V4_MAP_PASS:
+ if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+ goto doi_dump_failure;
+ buf_len -= NETLBL_LEN_U32;
+ for (tmp = 0;
+ tmp < CIPSO_V4_TAG_MAXCNT &&
+ iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+ tmp++) {
+ if (buf_len < NETLBL_LEN_U8)
+ goto doi_dump_failure;
+ if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+ goto doi_dump_failure;
+ buf_len -= NETLBL_LEN_U8;
+ }
+ break;
+ case CIPSO_V4_MAP_STD:
+ if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+ goto doi_dump_failure;
+ if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
+ goto doi_dump_failure;
+ if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
+ goto doi_dump_failure;
+ buf_len -= 3 * NETLBL_LEN_U32;
+ for (tmp = 0;
+ tmp < CIPSO_V4_TAG_MAXCNT &&
+ iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+ tmp++) {
+ if (buf_len < NETLBL_LEN_U8)
+ goto doi_dump_failure;
+ if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+ goto doi_dump_failure;
+ buf_len -= NETLBL_LEN_U8;
+ }
+ for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+ if (iter->map.std->lvl.local[tmp] !=
+ CIPSO_V4_INV_LVL) {
+ if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
+ goto doi_dump_failure;
+ if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+ goto doi_dump_failure;
+ if (nla_put_u8(skb,
+ NLA_U8,
+ iter->map.std->lvl.local[tmp]) != 0)
+ goto doi_dump_failure;
+ buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
+ }
+ for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+ if (iter->map.std->cat.local[tmp] !=
+ CIPSO_V4_INV_CAT) {
+ if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
+ goto doi_dump_failure;
+ if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+ goto doi_dump_failure;
+ if (nla_put_u16(skb,
+ NLA_U16,
+ iter->map.std->cat.local[tmp]) != 0)
+ goto doi_dump_failure;
+ buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
+ }
+ break;
+ }
+ rcu_read_unlock();
+
+ return skb;
+
+doi_dump_failure:
+ rcu_read_unlock();
+ kfree(skb);
+ return NULL;
+}
+
+/**
+ * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to add
+ *
+ * Description:
+ * Adds the @domain to the the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel). This function
+ * does allocate memory. Returns zero on success, negative values on failure.
+ *
+ */
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
+{
+ struct cipso_v4_domhsh_entry *iter;
+ struct cipso_v4_domhsh_entry *new_dom;
+
+ new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
+ if (new_dom == NULL)
+ return -ENOMEM;
+ if (domain) {
+ new_dom->domain = kstrdup(domain, GFP_KERNEL);
+ if (new_dom->domain == NULL) {
+ kfree(new_dom);
+ return -ENOMEM;
+ }
+ }
+ new_dom->valid = 1;
+ INIT_RCU_HEAD(&new_dom->rcu);
+
+ rcu_read_lock();
+ spin_lock(&cipso_v4_doi_list_lock);
+ list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+ if (iter->valid &&
+ ((domain != NULL && iter->domain != NULL &&
+ strcmp(iter->domain, domain) == 0) ||
+ (domain == NULL && iter->domain == NULL))) {
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+ kfree(new_dom->domain);
+ kfree(new_dom);
+ return -EEXIST;
+ }
+ list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes the @domain from the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel). Returns zero
+ * on success and negative values on error.
+ *
+ */
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+ const char *domain)
+{
+ struct cipso_v4_domhsh_entry *iter;
+
+ rcu_read_lock();
+ spin_lock(&cipso_v4_doi_list_lock);
+ list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+ if (iter->valid &&
+ ((domain != NULL && iter->domain != NULL &&
+ strcmp(iter->domain, domain) == 0) ||
+ (domain == NULL && iter->domain == NULL))) {
+ iter->valid = 0;
+ list_del_rcu(&iter->list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+ call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
+
+ return 0;
+ }
+ spin_unlock(&cipso_v4_doi_list_lock);
+ rcu_read_unlock();
+
+ return -ENOENT;
+}
+
+/*
+ * Label Mapping Functions
+ */
+
+/**
+ * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
+ * @doi_def: the DOI definition
+ * @level: the level to check
+ *
+ * Description:
+ * Checks the given level against the given DOI definition and returns a
+ * negative value if the level does not have a valid mapping and a zero value
+ * if the level is defined by the DOI.
+ *
+ */
+static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
+{
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+ return 0;
+ break;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
+ * @doi_def: the DOI definition
+ * @host_lvl: the host MLS level
+ * @net_lvl: the network/CIPSO MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS level to the correct
+ * CIPSO level using the given DOI definition. Returns zero on success,
+ * negative values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
+ u32 host_lvl,
+ u32 *net_lvl)
+{
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ *net_lvl = host_lvl;
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ if (host_lvl < doi_def->map.std->lvl.local_size) {
+ *net_lvl = doi_def->map.std->lvl.local[host_lvl];
+ return 0;
+ }
+ break;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
+ * @doi_def: the DOI definition
+ * @net_lvl: the network/CIPSO MLS level
+ * @host_lvl: the host MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO level to the correct local MLS
+ * level using the given DOI definition. Returns zero on success, negative
+ * values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
+ u32 net_lvl,
+ u32 *host_lvl)
+{
+ struct cipso_v4_std_map_tbl *map_tbl;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ *host_lvl = net_lvl;
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ map_tbl = doi_def->map.std;
+ if (net_lvl < map_tbl->lvl.cipso_size &&
+ map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
+ *host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
+ return 0;
+ }
+ break;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
+ * @doi_def: the DOI definition
+ * @bitmap: category bitmap
+ * @bitmap_len: bitmap length in bytes
+ *
+ * Description:
+ * Checks the given category bitmap against the given DOI definition and
+ * returns a negative value if any of the categories in the bitmap do not have
+ * a valid mapping and a zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
+ const unsigned char *bitmap,
+ u32 bitmap_len)
+{
+ int cat = -1;
+ u32 bitmap_len_bits = bitmap_len * 8;
+ u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
+ u32 *cipso_array = doi_def->map.std->cat.cipso;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ return 0;
+ case CIPSO_V4_MAP_STD:
+ for (;;) {
+ cat = cipso_v4_bitmap_walk(bitmap,
+ bitmap_len_bits,
+ cat + 1,
+ 1);
+ if (cat < 0)
+ break;
+ if (cat >= cipso_cat_size ||
+ cipso_array[cat] >= CIPSO_V4_INV_CAT)
+ return -EFAULT;
+ }
+
+ if (cat == -1)
+ return 0;
+ break;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @host_cat: the category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ * @net_cat: the zero'd out category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO bitmap using the given DOI definition. Returns the minimum
+ * size in bytes of the network bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
+ const unsigned char *host_cat,
+ u32 host_cat_len,
+ unsigned char *net_cat,
+ u32 net_cat_len)
+{
+ int host_spot = -1;
+ u32 net_spot;
+ u32 net_spot_max = 0;
+ u32 host_clen_bits = host_cat_len * 8;
+ u32 net_clen_bits = net_cat_len * 8;
+ u32 host_cat_size = doi_def->map.std->cat.local_size;
+ u32 *host_cat_array = doi_def->map.std->cat.local;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ net_spot_max = host_cat_len - 1;
+ while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
+ net_spot_max--;
+ if (net_spot_max > net_cat_len)
+ return -EINVAL;
+ memcpy(net_cat, host_cat, net_spot_max);
+ return net_spot_max;
+ case CIPSO_V4_MAP_STD:
+ for (;;) {
+ host_spot = cipso_v4_bitmap_walk(host_cat,
+ host_clen_bits,
+ host_spot + 1,
+ 1);
+ if (host_spot < 0)
+ break;
+ if (host_spot >= host_cat_size)
+ return -EPERM;
+
+ net_spot = host_cat_array[host_spot];
+ if (net_spot >= net_clen_bits)
+ return -ENOSPC;
+ cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
+
+ if (net_spot > net_spot_max)
+ net_spot_max = net_spot;
+ }
+
+ if (host_spot == -2)
+ return -EFAULT;
+
+ if (++net_spot_max % 8)
+ return net_spot_max / 8 + 1;
+ return net_spot_max / 8;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @host_cat: the zero'd out category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO bitmap to the correct local
+ * MLS category bitmap using the given DOI definition. Returns the minimum
+ * size in bytes of the host bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
+ const unsigned char *net_cat,
+ u32 net_cat_len,
+ unsigned char *host_cat,
+ u32 host_cat_len)
+{
+ u32 host_spot;
+ u32 host_spot_max = 0;
+ int net_spot = -1;
+ u32 net_clen_bits = net_cat_len * 8;
+ u32 host_clen_bits = host_cat_len * 8;
+ u32 net_cat_size = doi_def->map.std->cat.cipso_size;
+ u32 *net_cat_array = doi_def->map.std->cat.cipso;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ if (net_cat_len > host_cat_len)
+ return -EINVAL;
+ memcpy(host_cat, net_cat, net_cat_len);
+ return net_cat_len;
+ case CIPSO_V4_MAP_STD:
+ for (;;) {
+ net_spot = cipso_v4_bitmap_walk(net_cat,
+ net_clen_bits,
+ net_spot + 1,
+ 1);
+ if (net_spot < 0)
+ break;
+ if (net_spot >= net_cat_size ||
+ net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
+ return -EPERM;
+
+ host_spot = net_cat_array[net_spot];
+ if (host_spot >= host_clen_bits)
+ return -ENOSPC;
+ cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
+
+ if (host_spot > host_spot_max)
+ host_spot_max = host_spot;
+ }
+
+ if (net_spot == -2)
+ return -EFAULT;
+
+ if (++host_spot_max % 8)
+ return host_spot_max / 8 + 1;
+ return host_spot_max / 8;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Protocol Handling Functions
+ */
+
+#define CIPSO_V4_HDR_LEN 6
+
+/**
+ * cipso_v4_gentag_hdr - Generate a CIPSO option header
+ * @doi_def: the DOI definition
+ * @len: the total tag length in bytes
+ * @buf: the CIPSO option buffer
+ *
+ * Description:
+ * Write a CIPSO header into the beginning of @buffer. Return zero on success,
+ * negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
+ u32 len,
+ unsigned char *buf)
+{
+ if (CIPSO_V4_HDR_LEN + len > 40)
+ return -ENOSPC;
+
+ buf[0] = IPOPT_CIPSO;
+ buf[1] = CIPSO_V4_HDR_LEN + len;
+ *(u32 *)&buf[2] = htonl(doi_def->doi);
+
+ return 0;
+}
+
+#define CIPSO_V4_TAG1_CAT_LEN 30
+
+/**
+ * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The
+ * actual buffer length may be larger than the indicated size due to
+ * translation between host and network category bitmaps. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char **buffer,
+ u32 *buffer_len)
+{
+ int ret_val = -EPERM;
+ unsigned char *buf = NULL;
+ u32 buf_len;
+ u32 level;
+
+ if (secattr->mls_cat) {
+ buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
+ GFP_ATOMIC);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
+ secattr->mls_cat,
+ secattr->mls_cat_len,
+ &buf[CIPSO_V4_HDR_LEN + 4],
+ CIPSO_V4_TAG1_CAT_LEN);
+ if (ret_val < 0)
+ goto gentag_failure;
+
+ /* This will send packets using the "optimized" format when
+ * possibile as specified in section 3.4.2.6 of the
+ * CIPSO draft. */
+ if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
+ ret_val = 10;
+
+ buf_len = 4 + ret_val;
+ } else {
+ buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
+ if (buf == NULL)
+ return -ENOMEM;
+ buf_len = 4;
+ }
+
+ ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+ if (ret_val != 0)
+ goto gentag_failure;
+
+ ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
+ if (ret_val != 0)
+ goto gentag_failure;
+
+ buf[CIPSO_V4_HDR_LEN] = 0x01;
+ buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
+ buf[CIPSO_V4_HDR_LEN + 3] = level;
+
+ *buffer = buf;
+ *buffer_len = CIPSO_V4_HDR_LEN + buf_len;
+
+ return 0;
+
+gentag_failure:
+ kfree(buf);
+ return ret_val;
+}
+
+/**
+ * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
+ * attributes in @secattr. Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+ const unsigned char *tag,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u8 tag_len = tag[1];
+ u32 level;
+
+ ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
+ if (ret_val != 0)
+ return ret_val;
+ secattr->mls_lvl = level;
+ secattr->mls_lvl_vld = 1;
+
+ if (tag_len > 4) {
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ secattr->mls_cat_len = tag_len - 4;
+ break;
+ case CIPSO_V4_MAP_STD:
+ secattr->mls_cat_len =
+ doi_def->map.std->cat.local_size;
+ break;
+ }
+ secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
+ if (secattr->mls_cat == NULL)
+ return -ENOMEM;
+
+ ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+ &tag[4],
+ tag_len - 4,
+ secattr->mls_cat,
+ secattr->mls_cat_len);
+ if (ret_val < 0) {
+ kfree(secattr->mls_cat);
+ return ret_val;
+ }
+ secattr->mls_cat_len = ret_val;
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_validate - Validate a CIPSO option
+ * @option: the start of the option, on error it is set to point to the error
+ *
+ * Description:
+ * This routine is called to validate a CIPSO option, it checks all of the
+ * fields to ensure that they are at least valid, see the draft snippet below
+ * for details. If the option is valid then a zero value is returned and
+ * the value of @option is unchanged. If the option is invalid then a
+ * non-zero value is returned and @option is adjusted to point to the
+ * offending portion of the option. From the IETF draft ...
+ *
+ * "If any field within the CIPSO options, such as the DOI identifier, is not
+ * recognized the IP datagram is discarded and an ICMP 'parameter problem'
+ * (type 12) is generated and returned. The ICMP code field is set to 'bad
+ * parameter' (code 0) and the pointer is set to the start of the CIPSO field
+ * that is unrecognized."
+ *
+ */
+int cipso_v4_validate(unsigned char **option)
+{
+ unsigned char *opt = *option;
+ unsigned char *tag;
+ unsigned char opt_iter;
+ unsigned char err_offset = 0;
+ u8 opt_len;
+ u8 tag_len;
+ struct cipso_v4_doi *doi_def = NULL;
+ u32 tag_iter;
+
+ /* caller already checks for length values that are too large */
+ opt_len = opt[1];
+ if (opt_len < 8) {
+ err_offset = 1;
+ goto validate_return;
+ }
+
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
+ if (doi_def == NULL) {
+ err_offset = 2;
+ goto validate_return_locked;
+ }
+
+ opt_iter = 6;
+ tag = opt + opt_iter;
+ while (opt_iter < opt_len) {
+ for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
+ if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
+ ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
+ err_offset = opt_iter;
+ goto validate_return_locked;
+ }
+
+ tag_len = tag[1];
+ if (tag_len > (opt_len - opt_iter)) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+
+ switch (tag[0]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ if (tag_len < 4) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+
+ /* We are already going to do all the verification
+ * necessary at the socket layer so from our point of
+ * view it is safe to turn these checks off (and less
+ * work), however, the CIPSO draft says we should do
+ * all the CIPSO validations here but it doesn't
+ * really specify _exactly_ what we need to validate
+ * ... so, just make it a sysctl tunable. */
+ if (cipso_v4_rbm_strictvalid) {
+ if (cipso_v4_map_lvl_valid(doi_def,
+ tag[3]) < 0) {
+ err_offset = opt_iter + 3;
+ goto validate_return_locked;
+ }
+ if (tag_len > 4 &&
+ cipso_v4_map_cat_rbm_valid(doi_def,
+ &tag[4],
+ tag_len - 4) < 0) {
+ err_offset = opt_iter + 4;
+ goto validate_return_locked;
+ }
+ }
+ break;
+ default:
+ err_offset = opt_iter;
+ goto validate_return_locked;
+ }
+
+ tag += tag_len;
+ opt_iter += tag_len;
+ }
+
+validate_return_locked:
+ rcu_read_unlock();
+validate_return:
+ *option = opt + err_offset;
+ return err_offset;
+}
+
+/**
+ * cipso_v4_error - Send the correct reponse for a bad packet
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: CIPSO gateway flag
+ *
+ * Description:
+ * Based on the error code given in @error, send an ICMP error message back to
+ * the originating host. From the IETF draft ...
+ *
+ * "If the contents of the CIPSO [option] are valid but the security label is
+ * outside of the configured host or port label range, the datagram is
+ * discarded and an ICMP 'destination unreachable' (type 3) is generated and
+ * returned. The code field of the ICMP is set to 'communication with
+ * destination network administratively prohibited' (code 9) or to
+ * 'communication with destination host administratively prohibited'
+ * (code 10). The value of the code is dependent on whether the originator
+ * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The
+ * recipient of the ICMP message MUST be able to handle either value. The
+ * same procedure is performed if a CIPSO [option] can not be added to an
+ * IP packet because it is too large to fit in the IP options area."
+ *
+ * "If the error is triggered by receipt of an ICMP message, the message is
+ * discarded and no response is permitted (consistent with general ICMP
+ * processing rules)."
+ *
+ */
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
+{
+ if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+ return;
+
+ if (gateway)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+ else
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+}
+
+/**
+ * cipso_v4_socket_setattr - Add a CIPSO option to a socket
+ * @sock: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function. This function requires
+ * exclusive access to @sock->sk, which means it either needs to be in the
+ * process of being created or locked via lock_sock(sock->sk). Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_setattr(const struct socket *sock,
+ const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -EPERM;
+ u32 iter;
+ unsigned char *buf = NULL;
+ u32 buf_len = 0;
+ u32 opt_len;
+ struct ip_options *opt = NULL;
+ struct sock *sk;
+ struct inet_sock *sk_inet;
+ struct inet_connection_sock *sk_conn;
+
+ /* In the case of sock_create_lite(), the sock->sk field is not
+ * defined yet but it is not a problem as the only users of these
+ * "lite" PF_INET sockets are functions which do an accept() call
+ * afterwards so we will label the socket as part of the accept(). */
+ sk = sock->sk;
+ if (sk == NULL)
+ return 0;
+
+ /* XXX - This code assumes only one tag per CIPSO option which isn't
+ * really a good assumption to make but since we only support the MAC
+ * tags right now it is a safe assumption. */
+ iter = 0;
+ do {
+ switch (doi_def->tags[iter]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ ret_val = cipso_v4_gentag_rbm(doi_def,
+ secattr,
+ &buf,
+ &buf_len);
+ break;
+ default:
+ ret_val = -EPERM;
+ goto socket_setattr_failure;
+ }
+
+ iter++;
+ } while (ret_val != 0 &&
+ iter < CIPSO_V4_TAG_MAXCNT &&
+ doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
+ if (ret_val != 0)
+ goto socket_setattr_failure;
+
+ /* We can't use ip_options_get() directly because it makes a call to
+ * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
+ * we can't block here. */
+ opt_len = (buf_len + 3) & ~3;
+ opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
+ if (opt == NULL) {
+ ret_val = -ENOMEM;
+ goto socket_setattr_failure;
+ }
+ memcpy(opt->__data, buf, buf_len);
+ opt->optlen = opt_len;
+ opt->is_data = 1;
+ kfree(buf);
+ buf = NULL;
+ ret_val = ip_options_compile(opt, NULL);
+ if (ret_val != 0)
+ goto socket_setattr_failure;
+
+ sk_inet = inet_sk(sk);
+ if (sk_inet->is_icsk) {
+ sk_conn = inet_csk(sk);
+ if (sk_inet->opt)
+ sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
+ sk_conn->icsk_ext_hdr_len += opt->optlen;
+ sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+ }
+ opt = xchg(&sk_inet->opt, opt);
+ kfree(opt);
+
+ return 0;
+
+socket_setattr_failure:
+ kfree(buf);
+ kfree(opt);
+ return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -ENOMSG;
+ struct sock *sk;
+ struct inet_sock *sk_inet;
+ unsigned char *cipso_ptr;
+ u32 doi;
+ struct cipso_v4_doi *doi_def;
+
+ sk = sock->sk;
+ lock_sock(sk);
+ sk_inet = inet_sk(sk);
+ if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
+ goto socket_getattr_return;
+ cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
+ sizeof(struct iphdr);
+ ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
+ if (ret_val == 0)
+ goto socket_getattr_return;
+
+ doi = ntohl(*(u32 *)&cipso_ptr[2]);
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(doi);
+ if (doi_def == NULL) {
+ rcu_read_unlock();
+ goto socket_getattr_return;
+ }
+ switch (cipso_ptr[6]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ ret_val = cipso_v4_parsetag_rbm(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
+ }
+ rcu_read_unlock();
+
+socket_getattr_return:
+ release_sock(sk);
+ return ret_val;
+}
+
+/**
+ * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse the given packet's CIPSO option and return the security attributes.
+ * Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -ENOMSG;
+ unsigned char *cipso_ptr;
+ u32 doi;
+ struct cipso_v4_doi *doi_def;
+
+ if (!CIPSO_V4_OPTEXIST(skb))
+ return -ENOMSG;
+ cipso_ptr = CIPSO_V4_OPTPTR(skb);
+ if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
+ return 0;
+
+ doi = ntohl(*(u32 *)&cipso_ptr[2]);
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(doi);
+ if (doi_def == NULL)
+ goto skbuff_getattr_return;
+ switch (cipso_ptr[6]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ ret_val = cipso_v4_parsetag_rbm(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
+ }
+
+skbuff_getattr_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * cipso_v4_init - Initialize the CIPSO module
+ *
+ * Description:
+ * Initialize the CIPSO module and prepare it for use. Returns zero on success
+ * and negative values on failure.
+ *
+ */
+static int __init cipso_v4_init(void)
+{
+ int ret_val;
+
+ ret_val = cipso_v4_cache_init();
+ if (ret_val != 0)
+ panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
+ ret_val);
+
+ return 0;
+}
+
+subsys_initcall(cipso_v4_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a6cc31d911eb..8e8d1f17d77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -43,6 +43,7 @@
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -62,6 +63,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
+#include <net/netlink.h>
struct ipv4_devconf ipv4_devconf = {
.accept_redirects = 1,
@@ -78,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
.accept_source_route = 1,
};
-static void rtmsg_ifa(int event, struct in_ifaddr *);
+static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+ [IFA_LOCAL] = { .type = NLA_U32 },
+ [IFA_ADDRESS] = { .type = NLA_U32 },
+ [IFA_BROADCAST] = { .type = NLA_U32 },
+ [IFA_ANYCAST] = { .type = NLA_U32 },
+ [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+};
+
+static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -229,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
return 0;
}
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
- int destroy)
+static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy, struct nlmsghdr *nlh, u32 pid)
{
struct in_ifaddr *promote = NULL;
struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -263,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
if (!do_promote) {
*ifap1 = ifa->ifa_next;
- rtmsg_ifa(RTM_DELADDR, ifa);
+ rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_DOWN, ifa);
inet_free_ifa(ifa);
@@ -288,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
is valid, it will try to restore deleted routes... Grr.
So that, this order is correct.
*/
- rtmsg_ifa(RTM_DELADDR, ifa1);
+ rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (promote) {
@@ -300,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
- rtmsg_ifa(RTM_NEWADDR, promote);
+ rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_UP, promote);
for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -319,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
}
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy)
+{
+ __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
+}
+
+static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
+ u32 pid)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -364,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
listeners of netlink will know about new ifaddr */
- rtmsg_ifa(RTM_NEWADDR, ifa);
+ rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
return 0;
}
+static int inet_insert_ifa(struct in_ifaddr *ifa)
+{
+ return __inet_insert_ifa(ifa, NULL, 0);
+}
+
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -421,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
+ struct nlattr *tb[IFA_MAX+1];
struct in_device *in_dev;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct ifaddrmsg *ifm;
struct in_ifaddr *ifa, **ifap;
+ int err = -EINVAL;
ASSERT_RTNL();
- if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
- goto out;
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ ifm = nlmsg_data(nlh);
+ in_dev = inetdev_by_index(ifm->ifa_index);
+ if (in_dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+ }
+
__in_dev_put(in_dev);
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if ((rta[IFA_LOCAL - 1] &&
- memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
- &ifa->ifa_local, 4)) ||
- (rta[IFA_LABEL - 1] &&
- rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
- (rta[IFA_ADDRESS - 1] &&
- (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
- !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
- ifa))))
+ if (tb[IFA_LOCAL] &&
+ ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL]))
+ continue;
+
+ if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
+ continue;
+
+ if (tb[IFA_ADDRESS] &&
+ (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+ !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
continue;
- inet_del_ifa(in_dev, ifap, 1);
+
+ __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
return 0;
}
-out:
- return -EADDRNOTAVAIL;
+
+ err = -EADDRNOTAVAIL;
+errout:
+ return err;
}
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
{
- struct rtattr **rta = arg;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in_ifaddr *ifa;
+ struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
- struct in_ifaddr *ifa;
- int rc = -EINVAL;
+ int err = -EINVAL;
- ASSERT_RTNL();
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ if (err < 0)
+ goto errout;
- if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
- goto out;
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
+ goto errout;
- rc = -ENODEV;
- if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
- goto out;
+ dev = __dev_get_by_index(ifm->ifa_index);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+ }
- rc = -ENOBUFS;
- if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
+ in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev == NULL) {
in_dev = inetdev_init(dev);
- if (!in_dev)
- goto out;
+ if (in_dev == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
}
- if ((ifa = inet_alloc_ifa()) == NULL)
- goto out;
+ ifa = inet_alloc_ifa();
+ if (ifa == NULL) {
+ /*
+ * A potential indev allocation can be left alive, it stays
+ * assigned to its device and is destroy with it.
+ */
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ in_dev_hold(in_dev);
+
+ if (tb[IFA_ADDRESS] == NULL)
+ tb[IFA_ADDRESS] = tb[IFA_LOCAL];
- if (!rta[IFA_ADDRESS - 1])
- rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
- memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
- memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
- if (rta[IFA_BROADCAST - 1])
- memcpy(&ifa->ifa_broadcast,
- RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
- if (rta[IFA_ANYCAST - 1])
- memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
ifa->ifa_flags = ifm->ifa_flags;
ifa->ifa_scope = ifm->ifa_scope;
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
- if (rta[IFA_LABEL - 1])
- rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
+ ifa->ifa_dev = in_dev;
+
+ ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]);
+ ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]);
+
+ if (tb[IFA_BROADCAST])
+ ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]);
+
+ if (tb[IFA_ANYCAST])
+ ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]);
+
+ if (tb[IFA_LABEL])
+ nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
- rc = inet_insert_ifa(ifa);
-out:
- return rc;
+ return ifa;
+
+errout:
+ return ERR_PTR(err);
+}
+
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct in_ifaddr *ifa;
+
+ ASSERT_RTNL();
+
+ ifa = rtm_to_ifaddr(nlh);
+ if (IS_ERR(ifa))
+ return PTR_ERR(ifa);
+
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
}
/*
@@ -1056,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
- ifm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+
if (ifa->ifa_address)
- RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+ NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address);
+
if (ifa->ifa_local)
- RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+ NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local);
+
if (ifa->ifa_broadcast)
- RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+ NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
+
if (ifa->ifa_anycast)
- RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+ NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast);
+
if (ifa->ifa_label[0])
- RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1127,19 +1201,27 @@ done:
return skb->len;
}
-static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
+static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
+ u32 pid)
{
- int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
- struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+ struct sk_buff *skb;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
- if (!skb)
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
- else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+ err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+ if (err < 0) {
kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
- } else {
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
+ goto errout;
}
+
+ err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
}
static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -1151,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
[RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
.dumpit = inet_dump_fib, },
#ifdef CONFIG_IP_MULTIPLE_TABLES
- [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, },
- [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, },
- [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, },
+ [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
#endif
};
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b428489f6ccd..13b29360d102 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esph->seq_no = htonl(++x->replay.oseq);
xfrm_aevent_doreplay(x);
- if (esp->conf.ivlen)
+ if (esp->conf.ivlen) {
+ if (unlikely(!esp->conf.ivinitted)) {
+ get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ esp->conf.ivinitted = 1;
+ }
crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+ }
do {
struct scatterlist *sg = &esp->sgbuf[0];
@@ -248,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
* as per draft-ietf-ipsec-udp-encaps-06,
* section 3.1.2
*/
- if (!x->props.mode)
+ if (x->props.mode == XFRM_MODE_TRANSPORT)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -267,7 +272,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
mtu = ALIGN(mtu + 2, blksize);
} else {
/* The worst case. */
@@ -378,12 +383,12 @@ static int esp_init_state(struct xfrm_state *x)
esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
if (unlikely(esp->conf.ivec == NULL))
goto error;
- get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ esp->conf.ivinitted = 0;
}
if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
goto error;
x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ba2a70745a63..cfb527c060e4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,10 +32,12 @@
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
+#include <linux/list.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -50,48 +52,67 @@
#ifndef CONFIG_IP_MULTIPLE_TABLES
-#define RT_TABLE_MIN RT_TABLE_MAIN
-
struct fib_table *ip_fib_local_table;
struct fib_table *ip_fib_main_table;
-#else
+#define FIB_TABLE_HASHSZ 1
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-#define RT_TABLE_MIN 1
+#else
-struct fib_table *fib_tables[RT_TABLE_MAX+1];
+#define FIB_TABLE_HASHSZ 256
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-struct fib_table *__fib_new_table(int id)
+struct fib_table *fib_new_table(u32 id)
{
struct fib_table *tb;
+ unsigned int h;
+ if (id == 0)
+ id = RT_TABLE_MAIN;
+ tb = fib_get_table(id);
+ if (tb)
+ return tb;
tb = fib_hash_init(id);
if (!tb)
return NULL;
- fib_tables[id] = tb;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+ hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
return tb;
}
+struct fib_table *fib_get_table(u32 id)
+{
+ struct fib_table *tb;
+ struct hlist_node *node;
+ unsigned int h;
+ if (id == 0)
+ id = RT_TABLE_MAIN;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
+ if (tb->tb_id == id) {
+ rcu_read_unlock();
+ return tb;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
-
static void fib_flush(void)
{
int flushed = 0;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_table *tb;
- int id;
+ struct hlist_node *node;
+ unsigned int h;
- for (id = RT_TABLE_MAX; id>0; id--) {
- if ((tb = fib_get_table(id))==NULL)
- continue;
- flushed += tb->tb_flush(tb);
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
+ flushed += tb->tb_flush(tb);
}
-#else /* CONFIG_IP_MULTIPLE_TABLES */
- flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
- flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
-#endif /* CONFIG_IP_MULTIPLE_TABLES */
if (flushed)
rt_cache_flush(-1);
@@ -232,42 +253,190 @@ e_inval:
#ifndef CONFIG_IP_NOSIOCRT
+static inline u32 sk_extract_addr(struct sockaddr *addr)
+{
+ return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+}
+
+static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
+{
+ struct nlattr *nla;
+
+ nla = (struct nlattr *) ((char *) mx + len);
+ nla->nla_type = type;
+ nla->nla_len = nla_attr_size(4);
+ *(u32 *) nla_data(nla) = value;
+
+ return len + nla_total_size(4);
+}
+
+static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
+ struct fib_config *cfg)
+{
+ u32 addr;
+ int plen;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ if (rt->rt_dst.sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ /*
+ * Check mask for validity:
+ * a) it must be contiguous.
+ * b) destination must have all host bits clear.
+ * c) if application forgot to set correct family (AF_INET),
+ * reject request unless it is absolutely clear i.e.
+ * both family and mask are zero.
+ */
+ plen = 32;
+ addr = sk_extract_addr(&rt->rt_dst);
+ if (!(rt->rt_flags & RTF_HOST)) {
+ u32 mask = sk_extract_addr(&rt->rt_genmask);
+
+ if (rt->rt_genmask.sa_family != AF_INET) {
+ if (mask || rt->rt_genmask.sa_family)
+ return -EAFNOSUPPORT;
+ }
+
+ if (bad_mask(mask, addr))
+ return -EINVAL;
+
+ plen = inet_mask_len(mask);
+ }
+
+ cfg->fc_dst_len = plen;
+ cfg->fc_dst = addr;
+
+ if (cmd != SIOCDELRT) {
+ cfg->fc_nlflags = NLM_F_CREATE;
+ cfg->fc_protocol = RTPROT_BOOT;
+ }
+
+ if (rt->rt_metric)
+ cfg->fc_priority = rt->rt_metric - 1;
+
+ if (rt->rt_flags & RTF_REJECT) {
+ cfg->fc_scope = RT_SCOPE_HOST;
+ cfg->fc_type = RTN_UNREACHABLE;
+ return 0;
+ }
+
+ cfg->fc_scope = RT_SCOPE_NOWHERE;
+ cfg->fc_type = RTN_UNICAST;
+
+ if (rt->rt_dev) {
+ char *colon;
+ struct net_device *dev;
+ char devname[IFNAMSIZ];
+
+ if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
+ return -EFAULT;
+
+ devname[IFNAMSIZ-1] = 0;
+ colon = strchr(devname, ':');
+ if (colon)
+ *colon = 0;
+ dev = __dev_get_by_name(devname);
+ if (!dev)
+ return -ENODEV;
+ cfg->fc_oif = dev->ifindex;
+ if (colon) {
+ struct in_ifaddr *ifa;
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ if (!in_dev)
+ return -ENODEV;
+ *colon = ':';
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+ if (strcmp(ifa->ifa_label, devname) == 0)
+ break;
+ if (ifa == NULL)
+ return -ENODEV;
+ cfg->fc_prefsrc = ifa->ifa_local;
+ }
+ }
+
+ addr = sk_extract_addr(&rt->rt_gateway);
+ if (rt->rt_gateway.sa_family == AF_INET && addr) {
+ cfg->fc_gw = addr;
+ if (rt->rt_flags & RTF_GATEWAY &&
+ inet_addr_type(addr) == RTN_UNICAST)
+ cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ }
+
+ if (cmd == SIOCDELRT)
+ return 0;
+
+ if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+ return -EINVAL;
+
+ if (cfg->fc_scope == RT_SCOPE_NOWHERE)
+ cfg->fc_scope = RT_SCOPE_LINK;
+
+ if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
+ struct nlattr *mx;
+ int len = 0;
+
+ mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
+ if (mx == NULL)
+ return -ENOMEM;
+
+ if (rt->rt_flags & RTF_MTU)
+ len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
+
+ if (rt->rt_flags & RTF_WINDOW)
+ len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
+
+ if (rt->rt_flags & RTF_IRTT)
+ len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
+
+ cfg->fc_mx = mx;
+ cfg->fc_mx_len = len;
+ }
+
+ return 0;
+}
+
/*
* Handle IP routing ioctl calls. These are used to manipulate the routing tables
*/
int ip_rt_ioctl(unsigned int cmd, void __user *arg)
{
+ struct fib_config cfg;
+ struct rtentry rt;
int err;
- struct kern_rta rta;
- struct rtentry r;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
switch (cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+
+ if (copy_from_user(&rt, arg, sizeof(rt)))
return -EFAULT;
+
rtnl_lock();
- err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+ err = rtentry_to_fib_config(cmd, &rt, &cfg);
if (err == 0) {
+ struct fib_table *tb;
+
if (cmd == SIOCDELRT) {
- struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
- err = -ESRCH;
+ tb = fib_get_table(cfg.fc_table);
if (tb)
- err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+ err = tb->tb_delete(tb, &cfg);
+ else
+ err = -ESRCH;
} else {
- struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
- err = -ENOBUFS;
+ tb = fib_new_table(cfg.fc_table);
if (tb)
- err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ err = tb->tb_insert(tb, &cfg);
+ else
+ err = -ENOBUFS;
}
- kfree(rta.rta_mx);
+
+ /* allocated by rtentry_to_fib_config() */
+ kfree(cfg.fc_mx);
}
rtnl_unlock();
return err;
@@ -284,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
#endif
-static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
+struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+ [RTA_DST] = { .type = NLA_U32 },
+ [RTA_SRC] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
+ [RTA_OIF] = { .type = NLA_U32 },
+ [RTA_GATEWAY] = { .type = NLA_U32 },
+ [RTA_PRIORITY] = { .type = NLA_U32 },
+ [RTA_PREFSRC] = { .type = NLA_U32 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+ [RTA_PROTOINFO] = { .type = NLA_U32 },
+ [RTA_FLOW] = { .type = NLA_U32 },
+ [RTA_MP_ALGO] = { .type = NLA_U32 },
+};
+
+static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct fib_config *cfg)
{
- int i;
-
- for (i=1; i<=RTA_MAX; i++, rta++) {
- struct rtattr *attr = *rta;
- if (attr) {
- if (RTA_PAYLOAD(attr) < 4)
- return -EINVAL;
- if (i != RTA_MULTIPATH && i != RTA_METRICS)
- *rta = (struct rtattr*)RTA_DATA(attr);
+ struct nlattr *attr;
+ int err, remaining;
+ struct rtmsg *rtm;
+
+ err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ rtm = nlmsg_data(nlh);
+ cfg->fc_family = rtm->rtm_family;
+ cfg->fc_dst_len = rtm->rtm_dst_len;
+ cfg->fc_src_len = rtm->rtm_src_len;
+ cfg->fc_tos = rtm->rtm_tos;
+ cfg->fc_table = rtm->rtm_table;
+ cfg->fc_protocol = rtm->rtm_protocol;
+ cfg->fc_scope = rtm->rtm_scope;
+ cfg->fc_type = rtm->rtm_type;
+ cfg->fc_flags = rtm->rtm_flags;
+ cfg->fc_nlflags = nlh->nlmsg_flags;
+
+ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.nlh = nlh;
+
+ nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
+ switch (attr->nla_type) {
+ case RTA_DST:
+ cfg->fc_dst = nla_get_u32(attr);
+ break;
+ case RTA_SRC:
+ cfg->fc_src = nla_get_u32(attr);
+ break;
+ case RTA_OIF:
+ cfg->fc_oif = nla_get_u32(attr);
+ break;
+ case RTA_GATEWAY:
+ cfg->fc_gw = nla_get_u32(attr);
+ break;
+ case RTA_PRIORITY:
+ cfg->fc_priority = nla_get_u32(attr);
+ break;
+ case RTA_PREFSRC:
+ cfg->fc_prefsrc = nla_get_u32(attr);
+ break;
+ case RTA_METRICS:
+ cfg->fc_mx = nla_data(attr);
+ cfg->fc_mx_len = nla_len(attr);
+ break;
+ case RTA_MULTIPATH:
+ cfg->fc_mp = nla_data(attr);
+ cfg->fc_mp_len = nla_len(attr);
+ break;
+ case RTA_FLOW:
+ cfg->fc_flow = nla_get_u32(attr);
+ break;
+ case RTA_MP_ALGO:
+ cfg->fc_mp_alg = nla_get_u32(attr);
+ break;
+ case RTA_TABLE:
+ cfg->fc_table = nla_get_u32(attr);
+ break;
}
}
+
return 0;
+errout:
+ return err;
}
int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct fib_table * tb;
- struct rtattr **rta = arg;
- struct rtmsg *r = NLMSG_DATA(nlh);
+ struct fib_config cfg;
+ struct fib_table *tb;
+ int err;
- if (inet_check_attr(r, rta))
- return -EINVAL;
+ err = rtm_to_fib_config(skb, nlh, &cfg);
+ if (err < 0)
+ goto errout;
- tb = fib_get_table(r->rtm_table);
- if (tb)
- return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
- return -ESRCH;
+ tb = fib_get_table(cfg.fc_table);
+ if (tb == NULL) {
+ err = -ESRCH;
+ goto errout;
+ }
+
+ err = tb->tb_delete(tb, &cfg);
+errout:
+ return err;
}
int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct fib_table * tb;
- struct rtattr **rta = arg;
- struct rtmsg *r = NLMSG_DATA(nlh);
+ struct fib_config cfg;
+ struct fib_table *tb;
+ int err;
- if (inet_check_attr(r, rta))
- return -EINVAL;
+ err = rtm_to_fib_config(skb, nlh, &cfg);
+ if (err < 0)
+ goto errout;
- tb = fib_new_table(r->rtm_table);
- if (tb)
- return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
- return -ENOBUFS;
+ tb = fib_new_table(cfg.fc_table);
+ if (tb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = tb->tb_insert(tb, &cfg);
+errout:
+ return err;
}
int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- int t;
- int s_t;
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
struct fib_table *tb;
+ struct hlist_node *node;
+ int dumped = 0;
- if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
- ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+ if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+ ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
return ip_rt_dump(skb, cb);
- s_t = cb->args[0];
- if (s_t == 0)
- s_t = cb->args[0] = RT_TABLE_MIN;
-
- for (t=s_t; t<=RT_TABLE_MAX; t++) {
- if (t < s_t) continue;
- if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
- if ((tb = fib_get_table(t))==NULL)
- continue;
- if (tb->tb_dump(tb, skb, cb) < 0)
- break;
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
+ for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+ e = 0;
+ hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
+ if (e < s_e)
+ goto next;
+ if (dumped)
+ memset(&cb->args[2], 0, sizeof(cb->args) -
+ 2 * sizeof(cb->args[0]));
+ if (tb->tb_dump(tb, skb, cb) < 0)
+ goto out;
+ dumped = 1;
+next:
+ e++;
+ }
}
-
- cb->args[0] = t;
+out:
+ cb->args[1] = e;
+ cb->args[0] = h;
return skb->len;
}
@@ -366,17 +627,19 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
only when netlink is already locked.
*/
-static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, u32 dst, int dst_len,
+ struct in_ifaddr *ifa)
{
- struct fib_table * tb;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
- struct kern_rta rta;
-
- memset(&req.rtm, 0, sizeof(req.rtm));
- memset(&rta, 0, sizeof(rta));
+ struct fib_table *tb;
+ struct fib_config cfg = {
+ .fc_protocol = RTPROT_KERNEL,
+ .fc_type = type,
+ .fc_dst = dst,
+ .fc_dst_len = dst_len,
+ .fc_prefsrc = ifa->ifa_local,
+ .fc_oif = ifa->ifa_dev->dev->ifindex,
+ .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
+ };
if (type == RTN_UNICAST)
tb = fib_new_table(RT_TABLE_MAIN);
@@ -386,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
if (tb == NULL)
return;
- req.nlh.nlmsg_len = sizeof(req);
- req.nlh.nlmsg_type = cmd;
- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
- req.nlh.nlmsg_pid = 0;
- req.nlh.nlmsg_seq = 0;
+ cfg.fc_table = tb->tb_id;
- req.rtm.rtm_dst_len = dst_len;
- req.rtm.rtm_table = tb->tb_id;
- req.rtm.rtm_protocol = RTPROT_KERNEL;
- req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
- req.rtm.rtm_type = type;
-
- rta.rta_dst = &dst;
- rta.rta_prefsrc = &ifa->ifa_local;
- rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+ if (type != RTN_LOCAL)
+ cfg.fc_scope = RT_SCOPE_LINK;
+ else
+ cfg.fc_scope = RT_SCOPE_HOST;
if (cmd == RTM_NEWROUTE)
- tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ tb->tb_insert(tb, &cfg);
else
- tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+ tb->tb_delete(tb, &cfg);
}
void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -652,11 +906,17 @@ static struct notifier_block fib_netdev_notifier = {
void __init ip_fib_init(void)
{
+ unsigned int i;
+
+ for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+ INIT_HLIST_HEAD(&fib_table_hash[i]);
#ifndef CONFIG_IP_MULTIPLE_TABLES
ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
+ hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
#else
- fib_rules_init();
+ fib4_rules_init();
#endif
register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 72c633b357cf..88133b383dc5 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
return NULL;
}
-static int
-fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash *) tb->tb_data;
struct fib_node *new_f, *f;
struct fib_alias *fa, *new_fa;
struct fn_zone *fz;
struct fib_info *fi;
- int z = r->rtm_dst_len;
- int type = r->rtm_type;
- u8 tos = r->rtm_tos;
+ u8 tos = cfg->fc_tos;
u32 key;
int err;
- if (z > 32)
+ if (cfg->fc_dst_len > 32)
return -EINVAL;
- fz = table->fn_zones[z];
- if (!fz && !(fz = fn_new_zone(table, z)))
+
+ fz = table->fn_zones[cfg->fc_dst_len];
+ if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
return -ENOBUFS;
key = 0;
- if (rta->rta_dst) {
- u32 dst;
- memcpy(&dst, rta->rta_dst, 4);
- if (dst & ~FZ_MASK(fz))
+ if (cfg->fc_dst) {
+ if (cfg->fc_dst & ~FZ_MASK(fz))
return -EINVAL;
- key = fz_key(dst, fz);
+ key = fz_key(cfg->fc_dst, fz);
}
- if ((fi = fib_create_info(r, rta, n, &err)) == NULL)
- return err;
+ fi = fib_create_info(cfg);
+ if (IS_ERR(fi))
+ return PTR_ERR(fi);
if (fz->fz_nent > (fz->fz_divisor<<1) &&
fz->fz_divisor < FZ_MAX_DIVISOR &&
- (z==32 || (1<<z) > fz->fz_divisor))
+ (cfg->fc_dst_len == 32 ||
+ (1 << cfg->fc_dst_len) > fz->fz_divisor))
fn_rehash_zone(fz);
f = fib_find_node(fz, key);
@@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
struct fib_alias *fa_orig;
err = -EEXIST;
- if (n->nlmsg_flags & NLM_F_EXCL)
+ if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
- if (n->nlmsg_flags & NLM_F_REPLACE) {
+ if (cfg->fc_nlflags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
write_lock_bh(&fib_hash_lock);
fi_drop = fa->fa_info;
fa->fa_info = fi;
- fa->fa_type = type;
- fa->fa_scope = r->rtm_scope;
+ fa->fa_type = cfg->fc_type;
+ fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
fa->fa_state &= ~FA_S_ACCESSED;
fib_hash_genid++;
@@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
- if (fa->fa_type == type &&
- fa->fa_scope == r->rtm_scope &&
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_scope == cfg->fc_scope &&
fa->fa_info == fi)
goto out;
}
- if (!(n->nlmsg_flags & NLM_F_APPEND))
+ if (!(cfg->fc_nlflags & NLM_F_APPEND))
fa = fa_orig;
}
err = -ENOENT;
- if (!(n->nlmsg_flags&NLM_F_CREATE))
+ if (!(cfg->fc_nlflags & NLM_F_CREATE))
goto out;
err = -ENOBUFS;
@@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
- new_fa->fa_type = type;
- new_fa->fa_scope = r->rtm_scope;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
/*
@@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
fz->fz_nent++;
rt_cache_flush(-1);
- rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
+ rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
+ &cfg->fc_nlinfo);
return 0;
out_free_new_fa:
@@ -537,30 +535,25 @@ out:
}
-static int
-fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash*)tb->tb_data;
struct fib_node *f;
struct fib_alias *fa, *fa_to_delete;
- int z = r->rtm_dst_len;
struct fn_zone *fz;
u32 key;
- u8 tos = r->rtm_tos;
- if (z > 32)
+ if (cfg->fc_dst_len > 32)
return -EINVAL;
- if ((fz = table->fn_zones[z]) == NULL)
+
+ if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
return -ESRCH;
key = 0;
- if (rta->rta_dst) {
- u32 dst;
- memcpy(&dst, rta->rta_dst, 4);
- if (dst & ~FZ_MASK(fz))
+ if (cfg->fc_dst) {
+ if (cfg->fc_dst & ~FZ_MASK(fz))
return -EINVAL;
- key = fz_key(dst, fz);
+ key = fz_key(cfg->fc_dst, fz);
}
f = fib_find_node(fz, key);
@@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
if (!f)
fa = NULL;
else
- fa = fib_find_alias(&f->fn_alias, tos, 0);
+ fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
if (!fa)
return -ESRCH;
@@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
struct fib_info *fi = fa->fa_info;
- if (fa->fa_tos != tos)
+ if (fa->fa_tos != cfg->fc_tos)
break;
- if ((!r->rtm_type ||
- fa->fa_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE ||
- fa->fa_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- fib_nh_match(r, n, rta, fi) == 0) {
+ if ((!cfg->fc_type ||
+ fa->fa_type == cfg->fc_type) &&
+ (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+ fa->fa_scope == cfg->fc_scope) &&
+ (!cfg->fc_protocol ||
+ fi->fib_protocol == cfg->fc_protocol) &&
+ fib_nh_match(cfg, fi) == 0) {
fa_to_delete = fa;
break;
}
@@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
int kill_fn;
fa = fa_to_delete;
- rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
+ rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
+ tb->tb_id, &cfg->fc_nlinfo);
kill_fn = 0;
write_lock_bh(&fib_hash_lock);
@@ -684,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
struct fib_node *f;
int i, s_i;
- s_i = cb->args[3];
+ s_i = cb->args[4];
i = 0;
hlist_for_each_entry(f, node, head, fn_hash) {
struct fib_alias *fa;
@@ -699,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
tb->tb_id,
fa->fa_type,
fa->fa_scope,
- &f->fn_key,
+ f->fn_key,
fz->fz_order,
fa->fa_tos,
fa->fa_info,
NLM_F_MULTI) < 0) {
- cb->args[3] = i;
+ cb->args[4] = i;
return -1;
}
next:
i++;
}
}
- cb->args[3] = i;
+ cb->args[4] = i;
return skb->len;
}
@@ -722,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
{
int h, s_h;
- s_h = cb->args[2];
+ s_h = cb->args[3];
for (h=0; h < fz->fz_divisor; h++) {
if (h < s_h) continue;
if (h > s_h)
- memset(&cb->args[3], 0,
- sizeof(cb->args) - 3*sizeof(cb->args[0]));
+ memset(&cb->args[4], 0,
+ sizeof(cb->args) - 4*sizeof(cb->args[0]));
if (fz->fz_hash == NULL ||
hlist_empty(&fz->fz_hash[h]))
continue;
if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
- cb->args[2] = h;
+ cb->args[3] = h;
return -1;
}
}
- cb->args[2] = h;
+ cb->args[3] = h;
return skb->len;
}
@@ -746,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
struct fn_zone *fz;
struct fn_hash *table = (struct fn_hash*)tb->tb_data;
- s_m = cb->args[1];
+ s_m = cb->args[2];
read_lock(&fib_hash_lock);
for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
if (m < s_m) continue;
if (m > s_m)
- memset(&cb->args[2], 0,
- sizeof(cb->args) - 2*sizeof(cb->args[0]));
+ memset(&cb->args[3], 0,
+ sizeof(cb->args) - 3*sizeof(cb->args[0]));
if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
- cb->args[1] = m;
+ cb->args[2] = m;
read_unlock(&fib_hash_lock);
return -1;
}
}
read_unlock(&fib_hash_lock);
- cb->args[1] = m;
+ cb->args[2] = m;
return skb->len;
}
#ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
#else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
#endif
{
struct fib_table *tb;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ef6609ea0eb7..fd6f7769f8ab 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head,
struct fib_result *res, __u32 zone, __u32 mask,
int prefixlen);
extern void fib_release_info(struct fib_info *);
-extern struct fib_info *fib_create_info(const struct rtmsg *r,
- struct kern_rta *rta,
- const struct nlmsghdr *,
- int *err);
-extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
- struct kern_rta *rta, struct fib_info *fi);
+extern struct fib_info *fib_create_info(struct fib_config *cfg);
+extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u8 tb_id, u8 type, u8 scope, void *dst,
+ u32 tb_id, u8 type, u8 scope, u32 dst,
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
- int z, int tb_id,
- struct nlmsghdr *n, struct netlink_skb_parms *req);
+ int dst_len, u32 tb_id, struct nl_info *info);
extern struct fib_alias *fib_find_alias(struct list_head *fah,
u8 tos, u32 prio);
extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 79b04718bdfd..52b2adae4f22 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
*
* IPv4 Forwarding Information Base: policy rules.
*
- * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Thomas Graf <tgraf@suug.ch>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,463 +18,350 @@
* Marc Boucher : routing by fwmark
*/
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/inetdevice.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/inetdevice.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
-
#include <net/ip.h>
-#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
-#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/fib_rules.h>
-#define FRprintk(a...)
+static struct fib_rules_ops fib4_rules_ops;
-struct fib_rule
+struct fib4_rule
{
- struct hlist_node hlist;
- atomic_t r_clntref;
- u32 r_preference;
- unsigned char r_table;
- unsigned char r_action;
- unsigned char r_dst_len;
- unsigned char r_src_len;
- u32 r_src;
- u32 r_srcmask;
- u32 r_dst;
- u32 r_dstmask;
- u32 r_srcmap;
- u8 r_flags;
- u8 r_tos;
+ struct fib_rule common;
+ u8 dst_len;
+ u8 src_len;
+ u8 tos;
+ u32 src;
+ u32 srcmask;
+ u32 dst;
+ u32 dstmask;
#ifdef CONFIG_IP_ROUTE_FWMARK
- u32 r_fwmark;
+ u32 fwmark;
+ u32 fwmask;
#endif
- int r_ifindex;
#ifdef CONFIG_NET_CLS_ROUTE
- __u32 r_tclassid;
+ u32 tclassid;
#endif
- char r_ifname[IFNAMSIZ];
- int r_dead;
- struct rcu_head rcu;
};
-static struct fib_rule default_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_preference = 0x7FFF,
- .r_table = RT_TABLE_DEFAULT,
- .r_action = RTN_UNICAST,
+static struct fib4_rule default_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0x7FFF,
+ .table = RT_TABLE_DEFAULT,
+ .action = FR_ACT_TO_TBL,
+ },
};
-static struct fib_rule main_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_preference = 0x7FFE,
- .r_table = RT_TABLE_MAIN,
- .r_action = RTN_UNICAST,
+static struct fib4_rule main_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .pref = 0x7FFE,
+ .table = RT_TABLE_MAIN,
+ .action = FR_ACT_TO_TBL,
+ },
};
-static struct fib_rule local_rule = {
- .r_clntref = ATOMIC_INIT(2),
- .r_table = RT_TABLE_LOCAL,
- .r_action = RTN_UNICAST,
+static struct fib4_rule local_rule = {
+ .common = {
+ .refcnt = ATOMIC_INIT(2),
+ .table = RT_TABLE_LOCAL,
+ .action = FR_ACT_TO_TBL,
+ .flags = FIB_RULE_PERMANENT,
+ },
};
-static struct hlist_head fib_rules;
+static LIST_HEAD(fib4_rules);
-/* writer func called from netlink -- rtnl_sem hold*/
-
-static void rtmsg_rule(int, struct fib_rule *);
-
-int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+#ifdef CONFIG_NET_CLS_ROUTE
+u32 fib_rules_tclass(struct fib_result *res)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
- struct fib_rule *r;
- struct hlist_node *node;
- int err = -ESRCH;
-
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
- rtm->rtm_src_len == r->r_src_len &&
- rtm->rtm_dst_len == r->r_dst_len &&
- (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
- rtm->rtm_tos == r->r_tos &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
- (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
- (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
- (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
- (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
- (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
- err = -EPERM;
- if (r == &local_rule)
- break;
-
- hlist_del_rcu(&r->hlist);
- r->r_dead = 1;
- rtmsg_rule(RTM_DELRULE, r);
- fib_rule_put(r);
- err = 0;
- break;
- }
- }
- return err;
+ return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
}
+#endif
-/* Allocate new unique table id */
-
-static struct fib_table *fib_empty_table(void)
+int fib_lookup(struct flowi *flp, struct fib_result *res)
{
- int id;
+ struct fib_lookup_arg arg = {
+ .result = res,
+ };
+ int err;
- for (id = 1; id <= RT_TABLE_MAX; id++)
- if (fib_tables[id] == NULL)
- return __fib_new_table(id);
- return NULL;
-}
+ err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
+ res->r = arg.rule;
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
- struct fib_rule *r = container_of(head, struct fib_rule, rcu);
- kfree(r);
+ return err;
}
-void fib_rule_put(struct fib_rule *r)
+static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
{
- if (atomic_dec_and_test(&r->r_clntref)) {
- if (r->r_dead)
- call_rcu(&r->rcu, fib_rule_put_rcu);
- else
- printk("Freeing alive rule %p\n", r);
+ int err = -EAGAIN;
+ struct fib_table *tbl;
+
+ switch (rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+
+ case FR_ACT_UNREACHABLE:
+ err = -ENETUNREACH;
+ goto errout;
+
+ case FR_ACT_PROHIBIT:
+ err = -EACCES;
+ goto errout;
+
+ case FR_ACT_BLACKHOLE:
+ default:
+ err = -EINVAL;
+ goto errout;
}
+
+ if ((tbl = fib_get_table(rule->table)) == NULL)
+ goto errout;
+
+ err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+ if (err > 0)
+ err = -EAGAIN;
+errout:
+ return err;
}
-/* writer func called from netlink -- rtnl_sem hold*/
-int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
- struct fib_rule *r, *new_r, *last = NULL;
- struct hlist_node *node = NULL;
- unsigned char table_id;
-
- if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
- (rtm->rtm_tos & ~IPTOS_TOS_MASK))
- return -EINVAL;
-
- if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
- return -EINVAL;
-
- table_id = rtm->rtm_table;
- if (table_id == RT_TABLE_UNSPEC) {
- struct fib_table *table;
- if (rtm->rtm_type == RTN_UNICAST) {
- if ((table = fib_empty_table()) == NULL)
- return -ENOBUFS;
- table_id = table->tb_id;
- }
+ if (res->r && res->r->action == FR_ACT_TO_TBL &&
+ FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+ struct fib_table *tb;
+ if ((tb = fib_get_table(res->r->table)) != NULL)
+ tb->tb_select_default(tb, flp, res);
}
+}
- new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
- if (!new_r)
- return -ENOMEM;
-
- if (rta[RTA_SRC-1])
- memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
- if (rta[RTA_DST-1])
- memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
- if (rta[RTA_GATEWAY-1])
- memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
- new_r->r_src_len = rtm->rtm_src_len;
- new_r->r_dst_len = rtm->rtm_dst_len;
- new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
- new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
- new_r->r_tos = rtm->rtm_tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- if (rta[RTA_PROTOINFO-1])
- memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
-#endif
- new_r->r_action = rtm->rtm_type;
- new_r->r_flags = rtm->rtm_flags;
- if (rta[RTA_PRIORITY-1])
- memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
- new_r->r_table = table_id;
- if (rta[RTA_IIF-1]) {
- struct net_device *dev;
- rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
- new_r->r_ifindex = -1;
- dev = __dev_get_by_name(new_r->r_ifname);
- if (dev)
- new_r->r_ifindex = dev->ifindex;
- }
-#ifdef CONFIG_NET_CLS_ROUTE
- if (rta[RTA_FLOW-1])
- memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
-#endif
- r = container_of(fib_rules.first, struct fib_rule, hlist);
+static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+ struct fib4_rule *r = (struct fib4_rule *) rule;
+ u32 daddr = fl->fl4_dst;
+ u32 saddr = fl->fl4_src;
- if (!new_r->r_preference) {
- if (r && r->hlist.next != NULL) {
- r = container_of(r->hlist.next, struct fib_rule, hlist);
- if (r->r_preference)
- new_r->r_preference = r->r_preference - 1;
- }
- }
+ if (((saddr ^ r->src) & r->srcmask) ||
+ ((daddr ^ r->dst) & r->dstmask))
+ return 0;
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (r->r_preference > new_r->r_preference)
- break;
- last = r;
- }
- atomic_inc(&new_r->r_clntref);
+ if (r->tos && (r->tos != fl->fl4_tos))
+ return 0;
- if (last)
- hlist_add_after_rcu(&last->hlist, &new_r->hlist);
- else
- hlist_add_before_rcu(&new_r->hlist, &r->hlist);
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
+ return 0;
+#endif
- rtmsg_rule(RTM_NEWRULE, new_r);
- return 0;
+ return 1;
}
-#ifdef CONFIG_NET_CLS_ROUTE
-u32 fib_rules_tclass(struct fib_result *res)
+static struct fib_table *fib_empty_table(void)
{
- if (res->r)
- return res->r->r_tclassid;
- return 0;
+ u32 id;
+
+ for (id = 1; id <= RT_TABLE_MAX; id++)
+ if (fib_get_table(id) == NULL)
+ return fib_new_table(id);
+ return NULL;
}
-#endif
-/* callers should hold rtnl semaphore */
+static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+ [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+ [FRA_PRIORITY] = { .type = NLA_U32 },
+ [FRA_SRC] = { .type = NLA_U32 },
+ [FRA_DST] = { .type = NLA_U32 },
+ [FRA_FWMARK] = { .type = NLA_U32 },
+ [FRA_FWMASK] = { .type = NLA_U32 },
+ [FRA_FLOW] = { .type = NLA_U32 },
+ [FRA_TABLE] = { .type = NLA_U32 },
+};
-static void fib_rules_detach(struct net_device *dev)
+static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
{
- struct hlist_node *node;
- struct fib_rule *r;
+ int err = -EINVAL;
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+
+ if (frh->src_len > 32 || frh->dst_len > 32 ||
+ (frh->tos & ~IPTOS_TOS_MASK))
+ goto errout;
+
+ if (rule->table == RT_TABLE_UNSPEC) {
+ if (rule->action == FR_ACT_TO_TBL) {
+ struct fib_table *table;
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (r->r_ifindex == dev->ifindex)
- r->r_ifindex = -1;
+ table = fib_empty_table();
+ if (table == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+ rule->table = table->tb_id;
+ }
}
-}
-/* callers should hold rtnl semaphore */
+ if (tb[FRA_SRC])
+ rule4->src = nla_get_u32(tb[FRA_SRC]);
-static void fib_rules_attach(struct net_device *dev)
-{
- struct hlist_node *node;
- struct fib_rule *r;
+ if (tb[FRA_DST])
+ rule4->dst = nla_get_u32(tb[FRA_DST]);
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
- r->r_ifindex = dev->ifindex;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ if (tb[FRA_FWMARK]) {
+ rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+ if (rule4->fwmark)
+ /* compatibility: if the mark value is non-zero all bits
+ * are compared unless a mask is explicitly specified.
+ */
+ rule4->fwmask = 0xFFFFFFFF;
}
+
+ if (tb[FRA_FWMASK])
+ rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (tb[FRA_FLOW])
+ rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
+#endif
+
+ rule4->src_len = frh->src_len;
+ rule4->srcmask = inet_make_mask(rule4->src_len);
+ rule4->dst_len = frh->dst_len;
+ rule4->dstmask = inet_make_mask(rule4->dst_len);
+ rule4->tos = frh->tos;
+
+ err = 0;
+errout:
+ return err;
}
-int fib_lookup(const struct flowi *flp, struct fib_result *res)
+static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
{
- int err;
- struct fib_rule *r, *policy;
- struct fib_table *tb;
- struct hlist_node *node;
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- u32 daddr = flp->fl4_dst;
- u32 saddr = flp->fl4_src;
+ if (frh->src_len && (rule4->src_len != frh->src_len))
+ return 0;
-FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
- NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+ if (frh->dst_len && (rule4->dst_len != frh->dst_len))
+ return 0;
- rcu_read_lock();
+ if (frh->tos && (rule4->tos != frh->tos))
+ return 0;
- hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
- if (((saddr^r->r_src) & r->r_srcmask) ||
- ((daddr^r->r_dst) & r->r_dstmask) ||
- (r->r_tos && r->r_tos != flp->fl4_tos) ||
#ifdef CONFIG_IP_ROUTE_FWMARK
- (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
+ if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+ return 0;
+
+ if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+ return 0;
#endif
- (r->r_ifindex && r->r_ifindex != flp->iif))
- continue;
-
-FRprintk("tb %d r %d ", r->r_table, r->r_action);
- switch (r->r_action) {
- case RTN_UNICAST:
- policy = r;
- break;
- case RTN_UNREACHABLE:
- rcu_read_unlock();
- return -ENETUNREACH;
- default:
- case RTN_BLACKHOLE:
- rcu_read_unlock();
- return -EINVAL;
- case RTN_PROHIBIT:
- rcu_read_unlock();
- return -EACCES;
- }
- if ((tb = fib_get_table(r->r_table)) == NULL)
- continue;
- err = tb->tb_lookup(tb, flp, res);
- if (err == 0) {
- res->r = policy;
- if (policy)
- atomic_inc(&policy->r_clntref);
- rcu_read_unlock();
- return 0;
- }
- if (err < 0 && err != -EAGAIN) {
- rcu_read_unlock();
- return err;
- }
- }
-FRprintk("FAILURE\n");
- rcu_read_unlock();
- return -ENETUNREACH;
-}
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
+ return 0;
+#endif
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
-{
- if (res->r && res->r->r_action == RTN_UNICAST &&
- FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
- struct fib_table *tb;
- if ((tb = fib_get_table(res->r->r_table)) != NULL)
- tb->tb_select_default(tb, flp, res);
- }
-}
+ if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC])))
+ return 0;
-static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct net_device *dev = ptr;
+ if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST])))
+ return 0;
- if (event == NETDEV_UNREGISTER)
- fib_rules_detach(dev);
- else if (event == NETDEV_REGISTER)
- fib_rules_attach(dev);
- return NOTIFY_DONE;
+ return 1;
}
+static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
-static struct notifier_block fib_rules_notifier = {
- .notifier_call =fib_rules_event,
-};
+ frh->family = AF_INET;
+ frh->dst_len = rule4->dst_len;
+ frh->src_len = rule4->src_len;
+ frh->tos = rule4->tos;
-static __inline__ int inet_fill_rule(struct sk_buff *skb,
- struct fib_rule *r,
- u32 pid, u32 seq, int event,
- unsigned int flags)
-{
- struct rtmsg *rtm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
-
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
- rtm = NLMSG_DATA(nlh);
- rtm->rtm_family = AF_INET;
- rtm->rtm_dst_len = r->r_dst_len;
- rtm->rtm_src_len = r->r_src_len;
- rtm->rtm_tos = r->r_tos;
#ifdef CONFIG_IP_ROUTE_FWMARK
- if (r->r_fwmark)
- RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+ if (rule4->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
+
+ if (rule4->fwmask || rule4->fwmark)
+ NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
#endif
- rtm->rtm_table = r->r_table;
- rtm->rtm_protocol = 0;
- rtm->rtm_scope = 0;
- rtm->rtm_type = r->r_action;
- rtm->rtm_flags = r->r_flags;
-
- if (r->r_dst_len)
- RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
- if (r->r_src_len)
- RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
- if (r->r_ifname[0])
- RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
- if (r->r_preference)
- RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
- if (r->r_srcmap)
- RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+
+ if (rule4->dst_len)
+ NLA_PUT_U32(skb, FRA_DST, rule4->dst);
+
+ if (rule4->src_len)
+ NLA_PUT_U32(skb, FRA_SRC, rule4->src);
+
#ifdef CONFIG_NET_CLS_ROUTE
- if (r->r_tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+ if (rule4->tclassid)
+ NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
#endif
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ return 0;
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+nla_put_failure:
+ return -ENOBUFS;
}
-/* callers should hold rtnl semaphore */
-
-static void rtmsg_rule(int event, struct fib_rule *r)
+int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128);
- struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
-
- if (!skb)
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
- else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
- kfree_skb(skb);
- netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
- } else {
- netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
- }
+ return fib_rules_dump(skb, cb, AF_INET);
}
-int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static u32 fib4_rule_default_pref(void)
{
- int idx = 0;
- int s_idx = cb->args[0];
- struct fib_rule *r;
- struct hlist_node *node;
-
- rcu_read_lock();
- hlist_for_each_entry(r, node, &fib_rules, hlist) {
- if (idx < s_idx)
- goto next;
- if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_NEWRULE, NLM_F_MULTI) < 0)
- break;
-next:
- idx++;
+ struct list_head *pos;
+ struct fib_rule *rule;
+
+ if (!list_empty(&fib4_rules)) {
+ pos = fib4_rules.next;
+ if (pos->next != &fib4_rules) {
+ rule = list_entry(pos->next, struct fib_rule, list);
+ if (rule->pref)
+ return rule->pref - 1;
+ }
}
- rcu_read_unlock();
- cb->args[0] = idx;
- return skb->len;
+ return 0;
}
-void __init fib_rules_init(void)
+static struct fib_rules_ops fib4_rules_ops = {
+ .family = AF_INET,
+ .rule_size = sizeof(struct fib4_rule),
+ .action = fib4_rule_action,
+ .match = fib4_rule_match,
+ .configure = fib4_rule_configure,
+ .compare = fib4_rule_compare,
+ .fill = fib4_rule_fill,
+ .default_pref = fib4_rule_default_pref,
+ .nlgroup = RTNLGRP_IPV4_RULE,
+ .policy = fib4_rule_policy,
+ .rules_list = &fib4_rules,
+ .owner = THIS_MODULE,
+};
+
+void __init fib4_rules_init(void)
{
- INIT_HLIST_HEAD(&fib_rules);
- hlist_add_head(&local_rule.hlist, &fib_rules);
- hlist_add_after(&local_rule.hlist, &main_rule.hlist);
- hlist_add_after(&main_rule.hlist, &default_rule.hlist);
- register_netdevice_notifier(&fib_rules_notifier);
+ list_add_tail(&local_rule.common.list, &fib4_rules);
+ list_add_tail(&main_rule.common.list, &fib4_rules);
+ list_add_tail(&default_rule.common.list, &fib4_rules);
+
+ fib_rules_register(&fib4_rules_ops);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 51738000f3dc..2ead09543f68 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -33,7 +33,6 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
-#include <linux/netlink.h>
#include <linux/init.h>
#include <net/arp.h>
@@ -44,12 +43,14 @@
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip_mp_alg.h>
+#include <net/netlink.h>
+#include <net/nexthop.h>
#include "fib_lookup.h"
#define FSprintk(a...)
-static DEFINE_RWLOCK(fib_info_lock);
+static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_hash_size;
@@ -159,7 +160,7 @@ void free_fib_info(struct fib_info *fi)
void fib_release_info(struct fib_info *fi)
{
- write_lock_bh(&fib_info_lock);
+ spin_lock_bh(&fib_info_lock);
if (fi && --fi->fib_treeref == 0) {
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
@@ -172,7 +173,7 @@ void fib_release_info(struct fib_info *fi)
fi->fib_dead = 1;
fib_info_put(fi);
}
- write_unlock_bh(&fib_info_lock);
+ spin_unlock_bh(&fib_info_lock);
}
static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -254,7 +255,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
struct fib_nh *nh;
unsigned int hash;
- read_lock(&fib_info_lock);
+ spin_lock(&fib_info_lock);
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
@@ -262,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
if (nh->nh_dev == dev &&
nh->nh_gw == gw &&
!(nh->nh_flags&RTNH_F_DEAD)) {
- read_unlock(&fib_info_lock);
+ spin_unlock(&fib_info_lock);
return 0;
}
}
- read_unlock(&fib_info_lock);
+ spin_unlock(&fib_info_lock);
return -1;
}
void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
- int z, int tb_id,
- struct nlmsghdr *n, struct netlink_skb_parms *req)
+ int dst_len, u32 tb_id, struct nl_info *info)
{
struct sk_buff *skb;
- u32 pid = req ? req->pid : n->nlmsg_pid;
- int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-
- skb = alloc_skb(size, GFP_KERNEL);
- if (!skb)
- return;
-
- if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
- fa->fa_type, fa->fa_scope, &key, z,
- fa->fa_tos,
- fa->fa_info, 0) < 0) {
+ int payload = sizeof(struct rtmsg) + 256;
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+ err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+ fa->fa_type, fa->fa_scope, key, dst_len,
+ fa->fa_tos, fa->fa_info, 0);
+ if (err < 0) {
kfree_skb(skb);
- return;
+ goto errout;
}
- NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
- if (n->nlmsg_flags&NLM_F_ECHO)
- atomic_inc(&skb->users);
- netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
- if (n->nlmsg_flags&NLM_F_ECHO)
- netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+ err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
+ info->nlh, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
}
/* Return the first fib alias matching TOS with
@@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
-{
- while (RTA_OK(attr,attrlen)) {
- if (attr->rta_type == type)
- return *(u32*)RTA_DATA(attr);
- attr = RTA_NEXT(attr, attrlen);
- }
- return 0;
-}
-
-static int
-fib_count_nexthops(struct rtattr *rta)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
{
int nhs = 0;
- struct rtnexthop *nhp = RTA_DATA(rta);
- int nhlen = RTA_PAYLOAD(rta);
- while (nhlen >= (int)sizeof(struct rtnexthop)) {
- if ((nhlen -= nhp->rtnh_len) < 0)
- return 0;
+ while (rtnh_ok(rtnh, remaining)) {
nhs++;
- nhp = RTNH_NEXT(nhp);
- };
- return nhs;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ /* leftover implies invalid nexthop configuration, discard it */
+ return remaining > 0 ? 0 : nhs;
}
-static int
-fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+ int remaining, struct fib_config *cfg)
{
- struct rtnexthop *nhp = RTA_DATA(rta);
- int nhlen = RTA_PAYLOAD(rta);
-
change_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ int attrlen;
+
+ if (!rtnh_ok(rtnh, remaining))
return -EINVAL;
- nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
- nh->nh_oif = nhp->rtnh_ifindex;
- nh->nh_weight = nhp->rtnh_hops + 1;
- if (attrlen) {
- nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+
+ nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+ nh->nh_oif = rtnh->rtnh_ifindex;
+ nh->nh_weight = rtnh->rtnh_hops + 1;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ nh->nh_gw = nla ? nla_get_u32(nla) : 0;
#ifdef CONFIG_NET_CLS_ROUTE
- nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+ nla = nla_find(attrs, attrlen, RTA_FLOW);
+ nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
#endif
}
- nhp = RTNH_NEXT(nhp);
+
+ rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
+
return 0;
}
#endif
-int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
- struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- struct rtnexthop *nhp;
- int nhlen;
+ struct rtnexthop *rtnh;
+ int remaining;
#endif
- if (rta->rta_priority &&
- *rta->rta_priority != fi->fib_priority)
+ if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
- if (rta->rta_oif || rta->rta_gw) {
- if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
- (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+ if (cfg->fc_oif || cfg->fc_gw) {
+ if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
+ (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0;
return 1;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (rta->rta_mp == NULL)
+ if (cfg->fc_mp == NULL)
return 0;
- nhp = RTA_DATA(rta->rta_mp);
- nhlen = RTA_PAYLOAD(rta->rta_mp);
+
+ rtnh = cfg->fc_mp;
+ remaining = cfg->fc_mp_len;
for_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- u32 gw;
+ int attrlen;
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ if (!rtnh_ok(rtnh, remaining))
return -EINVAL;
- if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+
+ if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
return 1;
- if (attrlen) {
- gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
- if (gw && gw != nh->nh_gw)
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen < 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla && nla_get_u32(nla) != nh->nh_gw)
return 1;
#ifdef CONFIG_NET_CLS_ROUTE
- gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
- if (gw && gw != nh->nh_tclassid)
+ nla = nla_find(attrs, attrlen, RTA_FLOW);
+ if (nla && nla_get_u32(nla) != nh->nh_tclassid)
return 1;
#endif
}
- nhp = RTNH_NEXT(nhp);
+
+ rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
#endif
return 0;
@@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
|-> {local prefix} (terminal node)
*/
-static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+ struct fib_nh *nh)
{
int err;
@@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
if (nh->nh_flags&RTNH_F_ONLINK) {
struct net_device *dev;
- if (r->rtm_scope >= RT_SCOPE_LINK)
+ if (cfg->fc_scope >= RT_SCOPE_LINK)
return -EINVAL;
if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
return -EINVAL;
@@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
return 0;
}
{
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = nh->nh_gw,
- .scope = r->rtm_scope + 1 } },
- .oif = nh->nh_oif };
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = nh->nh_gw,
+ .scope = cfg->fc_scope + 1,
+ },
+ },
+ .oif = nh->nh_oif,
+ };
/* It is not necessary, but requires a bit of thinking */
if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -598,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
unsigned int old_size = fib_hash_size;
unsigned int i, bytes;
- write_lock_bh(&fib_info_lock);
+ spin_lock_bh(&fib_info_lock);
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_hash_size = new_size;
@@ -639,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
}
fib_info_laddrhash = new_laddrhash;
- write_unlock_bh(&fib_info_lock);
+ spin_unlock_bh(&fib_info_lock);
bytes = old_size * sizeof(struct hlist_head *);
fib_hash_free(old_info_hash, bytes);
fib_hash_free(old_laddrhash, bytes);
}
-struct fib_info *
-fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
- const struct nlmsghdr *nlh, int *errp)
+struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
struct fib_info *fi = NULL;
struct fib_info *ofi;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
int nhs = 1;
-#else
- const int nhs = 1;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- u32 mp_alg = IP_MP_ALG_NONE;
-#endif
/* Fast check to catch the most weird cases */
- if (fib_props[r->rtm_type].scope > r->rtm_scope)
+ if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
goto err_inval;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (rta->rta_mp) {
- nhs = fib_count_nexthops(rta->rta_mp);
+ if (cfg->fc_mp) {
+ nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
if (nhs == 0)
goto err_inval;
}
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (rta->rta_mp_alg) {
- mp_alg = *rta->rta_mp_alg;
-
- if (mp_alg < IP_MP_ALG_NONE ||
- mp_alg > IP_MP_ALG_MAX)
+ if (cfg->fc_mp_alg) {
+ if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+ cfg->fc_mp_alg > IP_MP_ALG_MAX)
goto err_inval;
}
#endif
@@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
goto failure;
fib_info_cnt++;
- fi->fib_protocol = r->rtm_protocol;
+ fi->fib_protocol = cfg->fc_protocol;
+ fi->fib_flags = cfg->fc_flags;
+ fi->fib_priority = cfg->fc_priority;
+ fi->fib_prefsrc = cfg->fc_prefsrc;
fi->fib_nhs = nhs;
change_nexthops(fi) {
nh->nh_parent = fi;
} endfor_nexthops(fi)
- fi->fib_flags = r->rtm_flags;
- if (rta->rta_priority)
- fi->fib_priority = *rta->rta_priority;
- if (rta->rta_mx) {
- int attrlen = RTA_PAYLOAD(rta->rta_mx);
- struct rtattr *attr = RTA_DATA(rta->rta_mx);
-
- while (RTA_OK(attr, attrlen)) {
- unsigned flavor = attr->rta_type;
- if (flavor) {
- if (flavor > RTAX_MAX)
+ if (cfg->fc_mx) {
+ struct nlattr *nla;
+ int remaining;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla->nla_type;
+
+ if (type) {
+ if (type > RTAX_MAX)
goto err_inval;
- fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+ fi->fib_metrics[type - 1] = nla_get_u32(nla);
}
- attr = RTA_NEXT(attr, attrlen);
}
}
- if (rta->rta_prefsrc)
- memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
- if (rta->rta_mp) {
+ if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+ if (err != 0)
goto failure;
- if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+ if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
goto err_inval;
- if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+ if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
goto err_inval;
#ifdef CONFIG_NET_CLS_ROUTE
- if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+ if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
goto err_inval;
#endif
#else
@@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
#endif
} else {
struct fib_nh *nh = fi->fib_nh;
- if (rta->rta_oif)
- nh->nh_oif = *rta->rta_oif;
- if (rta->rta_gw)
- memcpy(&nh->nh_gw, rta->rta_gw, 4);
+
+ nh->nh_oif = cfg->fc_oif;
+ nh->nh_gw = cfg->fc_gw;
+ nh->nh_flags = cfg->fc_flags;
#ifdef CONFIG_NET_CLS_ROUTE
- if (rta->rta_flow)
- memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+ nh->nh_tclassid = cfg->fc_flow;
#endif
- nh->nh_flags = r->rtm_flags;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight = 1;
#endif
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- fi->fib_mp_alg = mp_alg;
+ fi->fib_mp_alg = cfg->fc_mp_alg;
#endif
- if (fib_props[r->rtm_type].error) {
- if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+ if (fib_props[cfg->fc_type].error) {
+ if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
goto err_inval;
goto link_it;
}
- if (r->rtm_scope > RT_SCOPE_HOST)
+ if (cfg->fc_scope > RT_SCOPE_HOST)
goto err_inval;
- if (r->rtm_scope == RT_SCOPE_HOST) {
+ if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
goto failure;
} else {
change_nexthops(fi) {
- if ((err = fib_check_nh(r, fi, nh)) != 0)
+ if ((err = fib_check_nh(cfg, fi, nh)) != 0)
goto failure;
} endfor_nexthops(fi)
}
if (fi->fib_prefsrc) {
- if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
- memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+ if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+ fi->fib_prefsrc != cfg->fc_dst)
if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
goto err_inval;
}
@@ -820,7 +811,7 @@ link_it:
fi->fib_treeref++;
atomic_inc(&fi->fib_clntref);
- write_lock_bh(&fib_info_lock);
+ spin_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
@@ -839,19 +830,19 @@ link_it:
head = &fib_info_devhash[hash];
hlist_add_head(&nh->nh_hash, head);
} endfor_nexthops(fi)
- write_unlock_bh(&fib_info_lock);
+ spin_unlock_bh(&fib_info_lock);
return fi;
err_inval:
err = -EINVAL;
failure:
- *errp = err;
if (fi) {
fi->fib_dead = 1;
free_fib_info(fi);
}
- return NULL;
+
+ return ERR_PTR(err);
}
/* Note! fib_semantic_match intentionally uses RCU list functions. */
@@ -937,224 +928,89 @@ u32 __fib_res_prefsrc(struct fib_result *res)
return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
}
-int
-fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
- struct fib_info *fi, unsigned int flags)
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+ u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
+ struct fib_info *fi, unsigned int flags)
{
+ struct nlmsghdr *nlh;
struct rtmsg *rtm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
- rtm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = dst_len;
rtm->rtm_src_len = 0;
rtm->rtm_tos = tos;
rtm->rtm_table = tb_id;
+ NLA_PUT_U32(skb, RTA_TABLE, tb_id);
rtm->rtm_type = type;
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = scope;
- if (rtm->rtm_dst_len)
- RTA_PUT(skb, RTA_DST, 4, dst);
rtm->rtm_protocol = fi->fib_protocol;
+
+ if (rtm->rtm_dst_len)
+ NLA_PUT_U32(skb, RTA_DST, dst);
+
if (fi->fib_priority)
- RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+ NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
+
if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
- goto rtattr_failure;
+ goto nla_put_failure;
+
if (fi->fib_prefsrc)
- RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+ NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
+
if (fi->fib_nhs == 1) {
if (fi->fib_nh->nh_gw)
- RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+ NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
+
if (fi->fib_nh->nh_oif)
- RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+ NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
#ifdef CONFIG_NET_CLS_ROUTE
if (fi->fib_nh[0].nh_tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+ NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
#endif
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
- struct rtnexthop *nhp;
- struct rtattr *mp_head;
- if (skb_tailroom(skb) <= RTA_SPACE(0))
- goto rtattr_failure;
- mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+ struct rtnexthop *rtnh;
+ struct nlattr *mp;
+
+ mp = nla_nest_start(skb, RTA_MULTIPATH);
+ if (mp == NULL)
+ goto nla_put_failure;
for_nexthops(fi) {
- if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
- goto rtattr_failure;
- nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
- nhp->rtnh_flags = nh->nh_flags & 0xFF;
- nhp->rtnh_hops = nh->nh_weight-1;
- nhp->rtnh_ifindex = nh->nh_oif;
+ rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+ if (rtnh == NULL)
+ goto nla_put_failure;
+
+ rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+ rtnh->rtnh_hops = nh->nh_weight - 1;
+ rtnh->rtnh_ifindex = nh->nh_oif;
+
if (nh->nh_gw)
- RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+ NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
#ifdef CONFIG_NET_CLS_ROUTE
if (nh->nh_tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
+ NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
#endif
- nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+ /* length of rtnetlink header + attributes */
+ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
} endfor_nexthops(fi);
- mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb->tail - (u8*)mp_head;
- }
-#endif
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifndef CONFIG_IP_NOSIOCRT
-
-int
-fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
- struct kern_rta *rta, struct rtentry *r)
-{
- int plen;
- u32 *ptr;
-
- memset(rtm, 0, sizeof(*rtm));
- memset(rta, 0, sizeof(*rta));
-
- if (r->rt_dst.sa_family != AF_INET)
- return -EAFNOSUPPORT;
-
- /* Check mask for validity:
- a) it must be contiguous.
- b) destination must have all host bits clear.
- c) if application forgot to set correct family (AF_INET),
- reject request unless it is absolutely clear i.e.
- both family and mask are zero.
- */
- plen = 32;
- ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
- if (!(r->rt_flags&RTF_HOST)) {
- u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
- if (r->rt_genmask.sa_family != AF_INET) {
- if (mask || r->rt_genmask.sa_family)
- return -EAFNOSUPPORT;
- }
- if (bad_mask(mask, *ptr))
- return -EINVAL;
- plen = inet_mask_len(mask);
- }
-
- nl->nlmsg_flags = NLM_F_REQUEST;
- nl->nlmsg_pid = 0;
- nl->nlmsg_seq = 0;
- nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
- if (cmd == SIOCDELRT) {
- nl->nlmsg_type = RTM_DELROUTE;
- nl->nlmsg_flags = 0;
- } else {
- nl->nlmsg_type = RTM_NEWROUTE;
- nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
- rtm->rtm_protocol = RTPROT_BOOT;
- }
-
- rtm->rtm_dst_len = plen;
- rta->rta_dst = ptr;
-
- if (r->rt_metric) {
- *(u32*)&r->rt_pad3 = r->rt_metric - 1;
- rta->rta_priority = (u32*)&r->rt_pad3;
- }
- if (r->rt_flags&RTF_REJECT) {
- rtm->rtm_scope = RT_SCOPE_HOST;
- rtm->rtm_type = RTN_UNREACHABLE;
- return 0;
- }
- rtm->rtm_scope = RT_SCOPE_NOWHERE;
- rtm->rtm_type = RTN_UNICAST;
-
- if (r->rt_dev) {
- char *colon;
- struct net_device *dev;
- char devname[IFNAMSIZ];
-
- if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
- return -EFAULT;
- devname[IFNAMSIZ-1] = 0;
- colon = strchr(devname, ':');
- if (colon)
- *colon = 0;
- dev = __dev_get_by_name(devname);
- if (!dev)
- return -ENODEV;
- rta->rta_oif = &dev->ifindex;
- if (colon) {
- struct in_ifaddr *ifa;
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
- if (!in_dev)
- return -ENODEV;
- *colon = ':';
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
- if (strcmp(ifa->ifa_label, devname) == 0)
- break;
- if (ifa == NULL)
- return -ENODEV;
- rta->rta_prefsrc = &ifa->ifa_local;
- }
- }
- ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
- if (r->rt_gateway.sa_family == AF_INET && *ptr) {
- rta->rta_gw = ptr;
- if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
- rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ nla_nest_end(skb, mp);
}
+#endif
+ return nlmsg_end(skb, nlh);
- if (cmd == SIOCDELRT)
- return 0;
-
- if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
- return -EINVAL;
-
- if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
- rtm->rtm_scope = RT_SCOPE_LINK;
-
- if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
- struct rtattr *rec;
- struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
- if (mx == NULL)
- return -ENOMEM;
- rta->rta_mx = mx;
- mx->rta_type = RTA_METRICS;
- mx->rta_len = RTA_LENGTH(0);
- if (r->rt_flags&RTF_MTU) {
- rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
- rec->rta_type = RTAX_ADVMSS;
- rec->rta_len = RTA_LENGTH(4);
- mx->rta_len += RTA_LENGTH(4);
- *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
- }
- if (r->rt_flags&RTF_WINDOW) {
- rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
- rec->rta_type = RTAX_WINDOW;
- rec->rta_len = RTA_LENGTH(4);
- mx->rta_len += RTA_LENGTH(4);
- *(u32*)RTA_DATA(rec) = r->rt_window;
- }
- if (r->rt_flags&RTF_IRTT) {
- rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
- rec->rta_type = RTAX_RTT;
- rec->rta_len = RTA_LENGTH(4);
- mx->rta_len += RTA_LENGTH(4);
- *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
- }
- }
- return 0;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
-#endif
-
/*
Update FIB if:
- local address disappeared -> we must delete all the entries
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 01801c0f885d..9c3ff6ba6e21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1124,17 +1124,14 @@ err:
return fa_head;
}
-static int
-fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
struct fib_alias *fa, *new_fa;
struct list_head *fa_head = NULL;
struct fib_info *fi;
- int plen = r->rtm_dst_len;
- int type = r->rtm_type;
- u8 tos = r->rtm_tos;
+ int plen = cfg->fc_dst_len;
+ u8 tos = cfg->fc_tos;
u32 key, mask;
int err;
struct leaf *l;
@@ -1142,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
if (plen > 32)
return -EINVAL;
- key = 0;
- if (rta->rta_dst)
- memcpy(&key, rta->rta_dst, 4);
-
- key = ntohl(key);
+ key = ntohl(cfg->fc_dst);
- pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
+ pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
mask = ntohl(inet_make_mask(plen));
@@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
key = key & mask;
- fi = fib_create_info(r, rta, nlhdr, &err);
-
- if (!fi)
+ fi = fib_create_info(cfg);
+ if (IS_ERR(fi)) {
+ err = PTR_ERR(fi);
goto err;
+ }
l = fib_find_node(t, key);
fa = NULL;
@@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
struct fib_alias *fa_orig;
err = -EEXIST;
- if (nlhdr->nlmsg_flags & NLM_F_EXCL)
+ if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
- if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
+ if (cfg->fc_nlflags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
@@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
fi_drop = fa->fa_info;
new_fa->fa_tos = fa->fa_tos;
new_fa->fa_info = fi;
- new_fa->fa_type = type;
- new_fa->fa_scope = r->rtm_scope;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
new_fa->fa_state &= ~FA_S_ACCESSED;
@@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
- if (fa->fa_type == type &&
- fa->fa_scope == r->rtm_scope &&
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_scope == cfg->fc_scope &&
fa->fa_info == fi) {
goto out;
}
}
- if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
+ if (!(cfg->fc_nlflags & NLM_F_APPEND))
fa = fa_orig;
}
err = -ENOENT;
- if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
+ if (!(cfg->fc_nlflags & NLM_F_CREATE))
goto out;
err = -ENOBUFS;
@@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
- new_fa->fa_type = type;
- new_fa->fa_scope = r->rtm_scope;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
/*
* Insert new entry to the list.
@@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
(fa ? &fa->fa_list : fa_head));
rt_cache_flush(-1);
- rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+ &cfg->fc_nlinfo);
succeeded:
return 0;
@@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key)
return 1;
}
-static int
-fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
- struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
u32 key, mask;
- int plen = r->rtm_dst_len;
- u8 tos = r->rtm_tos;
+ int plen = cfg->fc_dst_len;
+ u8 tos = cfg->fc_tos;
struct fib_alias *fa, *fa_to_delete;
struct list_head *fa_head;
struct leaf *l;
struct leaf_info *li;
-
if (plen > 32)
return -EINVAL;
- key = 0;
- if (rta->rta_dst)
- memcpy(&key, rta->rta_dst, 4);
-
- key = ntohl(key);
+ key = ntohl(cfg->fc_dst);
mask = ntohl(inet_make_mask(plen));
if (key & ~mask)
@@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
if (fa->fa_tos != tos)
break;
- if ((!r->rtm_type ||
- fa->fa_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE ||
- fa->fa_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- fib_nh_match(r, nlhdr, rta, fi) == 0) {
+ if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
+ (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+ fa->fa_scope == cfg->fc_scope) &&
+ (!cfg->fc_protocol ||
+ fi->fib_protocol == cfg->fc_protocol) &&
+ fib_nh_match(cfg, fi) == 0) {
fa_to_delete = fa;
break;
}
@@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
return -ESRCH;
fa = fa_to_delete;
- rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
+ rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
+ &cfg->fc_nlinfo);
l = fib_find_node(t, key);
li = find_leaf_info(l, plen);
@@ -1848,7 +1836,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
u32 xkey = htonl(key);
- s_i = cb->args[3];
+ s_i = cb->args[4];
i = 0;
/* rcu_read_lock is hold by caller */
@@ -1866,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
tb->tb_id,
fa->fa_type,
fa->fa_scope,
- &xkey,
+ xkey,
plen,
fa->fa_tos,
fa->fa_info, 0) < 0) {
- cb->args[3] = i;
+ cb->args[4] = i;
return -1;
}
i++;
}
- cb->args[3] = i;
+ cb->args[4] = i;
return skb->len;
}
@@ -1886,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
struct list_head *fa_head;
struct leaf *l = NULL;
- s_h = cb->args[2];
+ s_h = cb->args[3];
for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
if (h < s_h)
continue;
if (h > s_h)
- memset(&cb->args[3], 0,
- sizeof(cb->args) - 3*sizeof(cb->args[0]));
+ memset(&cb->args[4], 0,
+ sizeof(cb->args) - 4*sizeof(cb->args[0]));
fa_head = get_fa_head(l, plen);
@@ -1904,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
continue;
if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
- cb->args[2] = h;
+ cb->args[3] = h;
return -1;
}
}
- cb->args[2] = h;
+ cb->args[3] = h;
return skb->len;
}
@@ -1917,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
int m, s_m;
struct trie *t = (struct trie *) tb->tb_data;
- s_m = cb->args[1];
+ s_m = cb->args[2];
rcu_read_lock();
for (m = 0; m <= 32; m++) {
if (m < s_m)
continue;
if (m > s_m)
- memset(&cb->args[2], 0,
- sizeof(cb->args) - 2*sizeof(cb->args[0]));
+ memset(&cb->args[3], 0,
+ sizeof(cb->args) - 3*sizeof(cb->args[0]));
if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
- cb->args[1] = m;
+ cb->args[2] = m;
goto out;
}
}
rcu_read_unlock();
- cb->args[1] = m;
+ cb->args[2] = m;
return skb->len;
out:
rcu_read_unlock();
@@ -1943,9 +1931,9 @@ out:
/* Fix more generic FIB names for init later */
#ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
#else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
#endif
{
struct fib_table *tb;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4c86ac3d882d..c2ad07e48ab4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
};
/* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts = 1;
+int sysctl_icmp_echo_ignore_all __read_mostly;
+int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
/* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses = 1;
+int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
/*
* Configurable global rate limit.
@@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1;
* time exceeded (11), parameter problem (12)
*/
-int sysctl_icmp_ratelimit = 1 * HZ;
-int sysctl_icmp_ratemask = 0x1818;
-int sysctl_icmp_errors_use_inbound_ifaddr;
+int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
+int sysctl_icmp_ratemask __read_mostly = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
/*
* ICMP control array. This specifies what to do with each ICMP.
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(skb->nh.iph->tos) } },
.proto = IPPROTO_ICMP };
+ security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
}
}
};
+ security_skb_classify_flow(skb_in, &fl);
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
@@ -928,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb)
ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (!(u16)csum_fold(skb->csum))
break;
/* fall through */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8e8117c19e4d..58be8227b0cb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb)
goto drop;
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (!(u16)csum_fold(skb->csum))
break;
/* fall through */
@@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
/*
* Join a socket to a group
*/
-int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
-int sysctl_igmp_max_msf = IP_MAX_MSF;
+int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
+int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e50a1bfd7ccc..07204391d083 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
{ .sport = inet_sk(sk)->sport,
.dport = ireq->rmt_port } } };
+ security_req_classify_flow(req, &fl);
if (ip_route_output_flow(&rt, &fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
@@ -509,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
/* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+
+ security_inet_csk_clone(newsk, req);
}
return newsk;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 95fac5532994..fb296c9a7f3f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
* remote address for the connection. So always assume those are both
* wildcarded during the search since they can never be otherwise.
*/
-struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
- const unsigned short hnum, const int dif)
+static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+ const u32 daddr,
+ const unsigned short hnum,
+ const int dif)
{
struct sock *result = NULL, *sk;
const struct hlist_node *node;
@@ -159,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
return result;
}
+/* Optimize the common listener case. */
+struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+ const u32 daddr, const unsigned short hnum,
+ const int dif)
+{
+ struct sock *sk = NULL;
+ const struct hlist_head *head;
+
+ read_lock(&hashinfo->lhash_lock);
+ head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+ if (!hlist_empty(head)) {
+ const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+ if (inet->num == hnum && !sk->sk_node.next &&
+ (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+ (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ !sk->sk_bound_dev_if)
+ goto sherry_cache;
+ sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
+ }
+ if (sk) {
+sherry_cache:
+ sock_hold(sk);
+ }
+ read_unlock(&hashinfo->lhash_lock);
+ return sk;
+}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
/* called with local bh disabled */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 03ff62ebcfeb..a675602ef295 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -126,12 +126,9 @@ void __init inet_initpeers(void)
peer_cachep = kmem_cache_create("inet_peer_cache",
sizeof(struct inet_peer),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!peer_cachep)
- panic("cannot create inet_peer_cache");
-
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b84b53a47526..165d72859ddf 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,15 +54,15 @@
* even the most extreme cases without allowing an attacker to measurably
* harm machine performance.
*/
-int sysctl_ipfrag_high_thresh = 256*1024;
-int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-int sysctl_ipfrag_max_dist = 64;
+int sysctl_ipfrag_max_dist __read_mostly = 64;
/* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
*/
-int sysctl_ipfrag_time = IP_FRAG_TIME;
+int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
struct ipfrag_skb_cb
{
@@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
}
static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
static void ipfrag_secret_rebuild(unsigned long dummy)
{
@@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_HW)
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f9b3a31997b..f5fba051df3d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -393,7 +393,8 @@ out:
int code = skb->h.icmph->code;
int rel_type = 0;
int rel_code = 0;
- int rel_info = 0;
+ __be32 rel_info = 0;
+ __u32 n = 0;
u16 flags;
int grehlen = (iph->ihl<<2) + 4;
struct sk_buff *skb2;
@@ -422,14 +423,16 @@ out:
default:
return;
case ICMP_PARAMETERPROB:
- if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+ n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ if (n < (iph->ihl<<2))
return;
/* So... This guy found something strange INSIDE encapsulated
packet. Well, he is fool, but what can we do ?
*/
rel_type = ICMP_PARAMETERPROB;
- rel_info = skb->h.icmph->un.gateway - grehlen;
+ n -= grehlen;
+ rel_info = htonl(n << 24);
break;
case ICMP_DEST_UNREACH:
@@ -440,13 +443,14 @@ out:
return;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- rel_info = ntohs(skb->h.icmph->un.frag.mtu);
- if (rel_info < grehlen+68)
+ n = ntohs(skb->h.icmph->un.frag.mtu);
+ if (n < grehlen+68)
return;
- rel_info -= grehlen;
+ n -= grehlen;
/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (rel_info > ntohs(eiph->tot_len))
+ if (n > ntohs(eiph->tot_len))
return;
+ rel_info = htonl(n);
break;
default:
/* All others are translated to HOST_UNREACH.
@@ -508,12 +512,11 @@ out:
/* change mtu on this route */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (rel_info > dst_mtu(skb2->dst)) {
+ if (n > dst_mtu(skb2->dst)) {
kfree_skb(skb2);
return;
}
- skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
- rel_info = htonl(rel_info);
+ skb2->dst->ops->update_pmtu(skb2->dst, n);
} else if (type == ICMP_TIME_EXCEEDED) {
struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
@@ -576,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if (flags&GRE_CSUM) {
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
csum = (u16)csum_fold(skb->csum);
if (!csum)
break;
@@ -584,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb)
case CHECKSUM_NONE:
skb->csum = 0;
csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_COMPLETE;
}
offset += 4;
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 406056edc02b..e7437c091326 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
#include <net/ip.h>
#include <net/icmp.h>
#include <net/route.h>
+#include <net/cipso_ipv4.h>
/*
* Write options to IP header, record destination address to
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
dopt->is_strictroute = sopt->is_strictroute;
}
}
+ if (sopt->cipso) {
+ optlen = sptr[sopt->cipso+1];
+ dopt->cipso = dopt->optlen+sizeof(struct iphdr);
+ memcpy(dptr, sptr+sopt->cipso, optlen);
+ dptr += optlen;
+ dopt->optlen += optlen;
+ }
while (dopt->optlen & 3) {
*dptr++ = IPOPT_END;
dopt->optlen++;
@@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
if (optptr[2] == 0 && optptr[3] == 0)
opt->router_alert = optptr - iph;
break;
+ case IPOPT_CIPSO:
+ if (opt->cipso) {
+ pp_ptr = optptr;
+ goto error;
+ }
+ opt->cipso = optptr - iph;
+ if (cipso_v4_validate(&optptr)) {
+ pp_ptr = optptr;
+ goto error;
+ }
+ break;
case IPOPT_SEC:
case IPOPT_SID:
default:
@@ -506,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp,
opt->__data[optlen++] = IPOPT_END;
opt->optlen = optlen;
opt->is_data = 1;
- opt->is_setbyuser = 1;
if (optlen && ip_options_compile(opt, NULL)) {
kfree(opt);
return -EINVAL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2ede167e045..97aee76fb746 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,7 +83,7 @@
#include <linux/netlink.h>
#include <linux/tcp.h>
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
/* Generate a checksum for an outgoing IP datagram. */
__inline__ void ip_send_check(struct iphdr *iph)
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
* keep trying until route appears or the connection times
* itself out.
*/
+ security_sk_classify_flow(sk, &fl);
if (ip_route_output_flow(&rt, &fl, sk, 0))
goto no_route;
}
@@ -425,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
int ptr;
struct net_device *dev;
struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len, ll_rs;
+ unsigned int mtu, hlen, left, len, ll_rs, pad;
int offset;
__be16 not_last_frag;
struct rtable *rt = (struct rtable*)skb->dst;
@@ -555,14 +556,13 @@ slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = raw + hlen; /* Where to start from */
-#ifdef CONFIG_BRIDGE_NETFILTER
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
- * we need to make room for the encapsulating header */
- ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
- mtu -= nf_bridge_pad(skb);
-#else
- ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
-#endif
+ * we need to make room for the encapsulating header
+ */
+ pad = nf_bridge_pad(skb);
+ ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+ mtu -= pad;
+
/*
* Fragment the datagram.
*/
@@ -679,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
{
struct iovec *iov = from;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (memcpy_fromiovecend(to, iov, offset, len) < 0)
return -EFAULT;
} else {
@@ -735,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->h.raw = skb->data + fragheaderlen;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
sk->sk_sndmsg_off = 0;
}
@@ -843,7 +843,7 @@ int ip_append_data(struct sock *sk,
length + fragheaderlen <= mtu &&
rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
!exthdrlen)
- csummode = CHECKSUM_HW;
+ csummode = CHECKSUM_PARTIAL;
inet->cork.length += length;
if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -1366,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
{ .sport = skb->h.th->dest,
.dport = skb->h.th->source } },
.proto = sk->sk_protocol };
+ security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
return;
}
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5bb9c9f03fb6..17342430a843 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
out_ok:
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
ip_send_check(iph);
return 0;
}
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->id.daddr.a4 = x->id.daddr.a4;
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET;
- t->props.mode = 1;
+ t->props.mode = XFRM_MODE_TUNNEL;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
@@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
goto out;
x->props.header_len = 0;
- if (x->props.mode)
+ if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
mutex_lock(&ipcomp_resource_mutex);
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
goto error;
mutex_unlock(&ipcomp_resource_mutex);
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
err = ipcomp_tunnel_attach(x);
if (err)
goto error_tunnel;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cb8a92f18ef6..1fbb38415b19 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
* -- Josef Siemes <jsiemes@web.de>, Aug 2002
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 76ab50b0d6ef..0c4556529228 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -341,7 +341,8 @@ out:
int code = skb->h.icmph->code;
int rel_type = 0;
int rel_code = 0;
- int rel_info = 0;
+ __be32 rel_info = 0;
+ __u32 n = 0;
struct sk_buff *skb2;
struct flowi fl;
struct rtable *rt;
@@ -354,14 +355,15 @@ out:
default:
return 0;
case ICMP_PARAMETERPROB:
- if (skb->h.icmph->un.gateway < hlen)
+ n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ if (n < hlen)
return 0;
/* So... This guy found something strange INSIDE encapsulated
packet. Well, he is fool, but what can we do ?
*/
rel_type = ICMP_PARAMETERPROB;
- rel_info = skb->h.icmph->un.gateway - hlen;
+ rel_info = htonl((n - hlen) << 24);
break;
case ICMP_DEST_UNREACH:
@@ -372,13 +374,14 @@ out:
return 0;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- rel_info = ntohs(skb->h.icmph->un.frag.mtu);
- if (rel_info < hlen+68)
+ n = ntohs(skb->h.icmph->un.frag.mtu);
+ if (n < hlen+68)
return 0;
- rel_info -= hlen;
+ n -= hlen;
/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (rel_info > ntohs(eiph->tot_len))
+ if (n > ntohs(eiph->tot_len))
return 0;
+ rel_info = htonl(n);
break;
default:
/* All others are translated to HOST_UNREACH.
@@ -440,12 +443,11 @@ out:
/* change mtu on this route */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (rel_info > dst_mtu(skb2->dst)) {
+ if (n > dst_mtu(skb2->dst)) {
kfree_skb(skb2);
return 0;
}
- skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
- rel_info = htonl(rel_info);
+ skb2->dst->ops->update_pmtu(skb2->dst, n);
} else if (type == ICMP_TIME_EXCEEDED) {
struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 85893eef6b16..ba49588da242 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
e = NLMSG_DATA(nlh);
e->error = -ETIMEDOUT;
memset(&e->msg, 0, sizeof(e->msg));
- netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+ rtnl_unicast(skb, NETLINK_CB(skb).pid);
} else
kfree_skb(skb);
}
@@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (skb->nh.iph->version == 0) {
- int err;
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
e->error = -EMSGSIZE;
memset(&e->msg, 0, sizeof(e->msg));
}
- err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+ rtnl_unicast(skb, NETLINK_CB(skb).pid);
} else
ip_mr_forward(skb, c, 0);
}
@@ -1899,11 +1900,8 @@ void __init ip_mr_init(void)
{
mrt_cachep = kmem_cache_create("ip_mrt_cache",
sizeof(struct mfc_cache),
- 0, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!mrt_cachep)
- panic("cannot allocate ip_mrt_cache");
-
init_timer(&ipmr_expire_timer);
ipmr_expire_timer.function=ipmr_expire_process;
register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b1160a3a..820e8318d10d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
skb->len - tcphoff,
skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
}
break;
default:
- /* CHECKSUM_UNNECESSARY */
+ /* No need to checksum. */
break;
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175d8f28..90c8166c0ec1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_HW)
+ if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
(*pskb)->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (csum_tcpudp_magic(skb->nh.iph->saddr,
skb->nh.iph->daddr,
skb->len - udphoff,
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
}
break;
default:
- /* CHECKSUM_UNNECESSARY */
+ /* No need to checksum. */
break;
}
}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6a9e34b794bc..f88347de21a9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int csum = 0;
switch (skb->ip_summed) {
- case CHECKSUM_HW:
+ case CHECKSUM_COMPLETE:
if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
break;
if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef0b5aac5838..a55b8ff70ded 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_MATCH_DSCP
- tristate "DSCP match support"
- depends on IP_NF_IPTABLES
- help
- This option adds a `DSCP' match, which allows you to match against
- the IPv4 header DSCP field (DSCP codepoint).
-
- The DSCP codepoint can have any value between 0x0 and 0x4f.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_AH
tristate "AH match support"
depends on IP_NF_IPTABLES
@@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_DSCP
- tristate "DSCP target support"
- depends on IP_NF_MANGLE
- help
- This option adds a `DSCP' match, which allows you to match against
- the IPv4 header DSCP field (DSCP codepoint).
-
- The DSCP codepoint can have any value between 0x0 and 0x4f.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_TTL
tristate 'TTL target support'
depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3ded4a3af59c..09aaed1a8063 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
@@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8d1d7a6e72a5..85f0d73ebfb4 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -56,8 +56,6 @@ do { \
#define ARP_NF_ASSERT(x)
#endif
-#include <linux/netfilter_ipv4/listhelp.h>
-
static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
char *hdr_addr, int len)
{
@@ -208,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
if (net_ratelimit())
printk("arp_tables: error: '%s'\n", (char *)targinfo);
@@ -226,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct arpt_table *table,
- void *userdata)
+ struct arpt_table *table)
{
static const char nulldevname[IFNAMSIZ];
unsigned int verdict = NF_DROP;
@@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
in, out,
hook,
t->u.kernel.target,
- t->data,
- userdata);
+ t->data);
/* Target might have changed stuff. */
arp = (*pskb)->nh.arph;
@@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
if (t->u.kernel.target == &arpt_standard_target) {
if (!standard_check(t, size)) {
ret = -EINVAL;
- goto out;
+ goto err;
}
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, e, target, t->data,
- t->u.target_size
- - sizeof(*t),
e->comefrom)) {
duprintf("arp_tables: check failed for `%s'.\n",
t->u.kernel.target->name);
@@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
t = arpt_get_target(e);
if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t));
+ t->u.kernel.target->destroy(t->u.kernel.target, t->data);
module_put(t->u.kernel.target->me);
return 0;
}
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a58325c1ceb9..d12b1df252a1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -11,7 +11,7 @@ static unsigned int
target(struct sk_buff **pskb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
const struct arpt_mangle *mangle = targinfo;
struct arphdr *arp;
@@ -67,7 +67,7 @@ target(struct sk_buff **pskb,
static int
checkentry(const char *tablename, const void *e, const struct xt_target *target,
- void *targinfo, unsigned int targinfosize, unsigned int hook_mask)
+ void *targinfo, unsigned int hook_mask)
{
const struct arpt_mangle *mangle = targinfo;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d7c472faa53b..7edea2a1696c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return arpt_do_table(pskb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index aa459177c3f8..c432b3163609 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -47,7 +47,6 @@
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#define IP_CONNTRACK_VERSION "2.4"
@@ -64,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0);
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size = 0;
-int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
+unsigned int ip_conntrack_htable_size __read_mostly = 0;
+int ip_conntrack_max __read_mostly;
+struct list_head *ip_conntrack_hash __read_mostly;
static kmem_cache_t *ip_conntrack_cachep __read_mostly;
static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid;
+unsigned int ip_ct_log_invalid __read_mostly;
static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc;
+static int ip_conntrack_vmalloc __read_mostly;
static unsigned int ip_conntrack_next_id;
static unsigned int ip_conntrack_expect_next_id;
@@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct)
static void
clean_from_lists(struct ip_conntrack *ct)
{
- unsigned int ho, hr;
-
DEBUGP("clean_from_lists(%p)\n", ct);
ASSERT_WRITE_LOCK(&ip_conntrack_lock);
-
- ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
- LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
/* Destroy all pending expectations */
ip_ct_remove_expectations(ct);
@@ -313,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
{
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto;
+ struct ip_conntrack_helper *helper;
DEBUGP("destroy_conntrack(%p)\n", ct);
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -321,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
ip_conntrack_event(IPCT_DESTROY, ct);
set_bit(IPS_DYING_BIT, &ct->status);
+ helper = ct->helper;
+ if (helper && helper->destroy)
+ helper->destroy(ct);
+
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */
@@ -367,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
ip_conntrack_put(ct);
}
-static inline int
-conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- ASSERT_READ_LOCK(&ip_conntrack_lock);
- return tuplehash_to_ctrack(i) != ignored_conntrack
- && ip_ct_tuple_equal(tuple, &i->tuple);
-}
-
struct ip_conntrack_tuple_hash *
__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack)
@@ -386,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
ASSERT_READ_LOCK(&ip_conntrack_lock);
list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
- if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ if (tuplehash_to_ctrack(h) != ignored_conntrack &&
+ ip_ct_tuple_equal(tuple, &h->tuple)) {
CONNTRACK_STAT_INC(found);
return h;
}
@@ -417,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
unsigned int repl_hash)
{
ct->id = ++ip_conntrack_next_id;
- list_prepend(&ip_conntrack_hash[hash],
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- list_prepend(&ip_conntrack_hash[repl_hash],
- &ct->tuplehash[IP_CT_DIR_REPLY].list);
+ list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+ &ip_conntrack_hash[hash]);
+ list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+ &ip_conntrack_hash[repl_hash]);
}
void ip_conntrack_hash_insert(struct ip_conntrack *ct)
@@ -440,6 +430,7 @@ int
__ip_conntrack_confirm(struct sk_buff **pskb)
{
unsigned int hash, repl_hash;
+ struct ip_conntrack_tuple_hash *h;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
@@ -470,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
- if (!LIST_FIND(&ip_conntrack_hash[hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
- && !LIST_FIND(&ip_conntrack_hash[repl_hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
- /* Remove from unconfirmed list */
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+ list_for_each_entry(h, &ip_conntrack_hash[hash], list)
+ if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &h->tuple))
+ goto out;
+ list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
+ if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &h->tuple))
+ goto out;
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- /* Timer relative to confirmation time, not original
- setting time, otherwise we'd get timer wrap in
- weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
- atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
- CONNTRACK_STAT_INC(insert);
- write_unlock_bh(&ip_conntrack_lock);
- if (ct->helper)
- ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+ /* Remove from unconfirmed list */
+ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+ __ip_conntrack_hash_insert(ct, hash, repl_hash);
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+ weird delay cases. */
+ ct->timeout.expires += jiffies;
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ CONNTRACK_STAT_INC(insert);
+ write_unlock_bh(&ip_conntrack_lock);
+ if (ct->helper)
+ ip_conntrack_event_cache(IPCT_HELPER, *pskb);
#ifdef CONFIG_IP_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+ if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+ test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+ ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
#endif
- ip_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
+ ip_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, *pskb);
- return NF_ACCEPT;
- }
+ return NF_ACCEPT;
+out:
CONNTRACK_STAT_INC(insert_failed);
write_unlock_bh(&ip_conntrack_lock);
-
return NF_DROP;
}
@@ -527,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
-{
- return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
-}
-
static int early_drop(struct list_head *chain)
{
/* Traverse backwards: gives us oldest, which is roughly LRU */
struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct = NULL;
+ struct ip_conntrack *ct = NULL, *tmp;
int dropped = 0;
read_lock_bh(&ip_conntrack_lock);
- h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
- if (h) {
- ct = tuplehash_to_ctrack(h);
- atomic_inc(&ct->ct_general.use);
+ list_for_each_entry_reverse(h, chain, list) {
+ tmp = tuplehash_to_ctrack(h);
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+ ct = tmp;
+ atomic_inc(&ct->ct_general.use);
+ break;
+ }
}
read_unlock_bh(&ip_conntrack_lock);
@@ -559,18 +548,16 @@ static int early_drop(struct list_head *chain)
return dropped;
}
-static inline int helper_cmp(const struct ip_conntrack_helper *i,
- const struct ip_conntrack_tuple *rtuple)
-{
- return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
static struct ip_conntrack_helper *
__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
{
- return LIST_FIND(&helpers, helper_cmp,
- struct ip_conntrack_helper *,
- tuple);
+ struct ip_conntrack_helper *h;
+
+ list_for_each_entry(h, &helpers, list) {
+ if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+ return h;
+ }
+ return NULL;
}
struct ip_conntrack_helper *
@@ -640,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
ip_conntrack_hash_rnd_initted = 1;
}
+ /* We don't want any race condition at early drop stage */
+ atomic_inc(&ip_conntrack_count);
+
if (ip_conntrack_max
- && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+ && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
unsigned int hash = hash_conntrack(orig);
/* Try dropping from this hash chain. */
if (!early_drop(&ip_conntrack_hash[hash])) {
+ atomic_dec(&ip_conntrack_count);
if (net_ratelimit())
printk(KERN_WARNING
"ip_conntrack: table full, dropping"
@@ -656,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) {
DEBUGP("Can't allocate conntrack.\n");
+ atomic_dec(&ip_conntrack_count);
return ERR_PTR(-ENOMEM);
}
@@ -669,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
- atomic_inc(&ip_conntrack_count);
-
return conntrack;
}
@@ -1062,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
{
BUG_ON(me->timeout == 0);
write_lock_bh(&ip_conntrack_lock);
- list_prepend(&helpers, me);
+ list_add(&me->list, &helpers);
write_unlock_bh(&ip_conntrack_lock);
return 0;
@@ -1081,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name)
return NULL;
}
-static inline int unhelp(struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_helper *me)
+static inline void unhelp(struct ip_conntrack_tuple_hash *i,
+ const struct ip_conntrack_helper *me)
{
if (tuplehash_to_ctrack(i)->helper == me) {
ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
tuplehash_to_ctrack(i)->helper = NULL;
}
- return 0;
}
void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
{
unsigned int i;
+ struct ip_conntrack_tuple_hash *h;
struct ip_conntrack_expect *exp, *tmp;
/* Need write lock here, to delete helper. */
write_lock_bh(&ip_conntrack_lock);
- LIST_DELETE(&helpers, me);
+ list_del(&me->list);
/* Get rid of expectations */
list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
@@ -1108,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
}
}
/* Get rid of expecteds, set helpers to NULL. */
- LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
- struct ip_conntrack_tuple_hash *, me);
+ list_for_each_entry(h, &unconfirmed, list)
+ unhelp(h, me);
+ for (i = 0; i < ip_conntrack_htable_size; i++) {
+ list_for_each_entry(h, &ip_conntrack_hash[i], list)
+ unhelp(h, me);
+ }
write_unlock_bh(&ip_conntrack_lock);
/* Someone could be still looking at the helper in a bh. */
@@ -1237,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
nf_conntrack_get(nskb->nfct);
}
-static inline int
-do_iter(const struct ip_conntrack_tuple_hash *i,
- int (*iter)(struct ip_conntrack *i, void *data),
- void *data)
-{
- return iter(tuplehash_to_ctrack(i), data);
-}
-
/* Bring out ya dead! */
-static struct ip_conntrack_tuple_hash *
+static struct ip_conntrack *
get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
void *data, unsigned int *bucket)
{
- struct ip_conntrack_tuple_hash *h = NULL;
+ struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct;
write_lock_bh(&ip_conntrack_lock);
for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
- h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- break;
+ list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
+ ct = tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ }
+ list_for_each_entry(h, &unconfirmed, list) {
+ ct = tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
}
- if (!h)
- h = LIST_FIND_W(&unconfirmed, do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
write_unlock_bh(&ip_conntrack_lock);
+ return NULL;
- return h;
+found:
+ atomic_inc(&ct->ct_general.use);
+ write_unlock_bh(&ip_conntrack_lock);
+ return ct;
}
void
ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
{
- struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct;
unsigned int bucket = 0;
- while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
- struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
death_by_timeout((unsigned long)ct);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index b020a33e65e9..fb0aee691721 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -20,11 +20,11 @@
* - We can only support one single call within each session
*
* TODO:
- * - testing of incoming PPTP calls
+ * - testing of incoming PPTP calls
*
- * Changes:
+ * Changes:
* 2002-02-05 - Version 1.3
- * - Call ip_conntrack_unexpect_related() from
+ * - Call ip_conntrack_unexpect_related() from
* pptp_destroy_siblings() to destroy expectations in case
* CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
* (Philip Craig <philipc@snapgear.com>)
@@ -80,7 +80,7 @@ int
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq);
-int
+void
(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
struct ip_conntrack_expect *expect_reply);
@@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct,
invert_tuplepr(&inv_t, &exp->tuple);
DEBUGP("trying to unexpect other dir: ");
DUMP_TUPLE(&inv_t);
-
+
exp_other = ip_conntrack_expect_find(&inv_t);
if (exp_other) {
/* delete other expectation. */
@@ -194,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
{
struct ip_conntrack_tuple t;
- /* Since ct->sibling_list has literally rusted away in 2.6.11,
+ ip_ct_gre_keymap_destroy(ct);
+ /* Since ct->sibling_list has literally rusted away in 2.6.11,
* we now need another way to find out about our sibling
* contrack and expects... -HW */
/* try original (pns->pac) tuple */
memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
- t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+ t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
+ t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
if (!destroy_sibling_or_exp(&t))
DEBUGP("failed to timeout original pns->pac ct/exp\n");
@@ -210,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
/* try reply (pac->pns) tuple */
memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
- t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+ t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
+ t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
if (!destroy_sibling_or_exp(&t))
DEBUGP("failed to timeout reply pac->pns ct/exp\n");
@@ -219,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
static inline int
-exp_gre(struct ip_conntrack *master,
- u_int32_t seq,
+exp_gre(struct ip_conntrack *ct,
__be16 callid,
__be16 peer_callid)
{
- struct ip_conntrack_tuple inv_tuple;
- struct ip_conntrack_tuple exp_tuples[] = {
- /* tuple in original direction, PNS->PAC */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
- .u = { .gre = { .key = peer_callid } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
- .u = { .gre = { .key = callid } },
- .protonum = IPPROTO_GRE
- },
- },
- /* tuple in reply direction, PAC->PNS */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
- .u = { .gre = { .key = callid } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
- .u = { .gre = { .key = peer_callid } },
- .protonum = IPPROTO_GRE
- },
- }
- };
struct ip_conntrack_expect *exp_orig, *exp_reply;
int ret = 1;
- exp_orig = ip_conntrack_expect_alloc(master);
+ exp_orig = ip_conntrack_expect_alloc(ct);
if (exp_orig == NULL)
goto out;
- exp_reply = ip_conntrack_expect_alloc(master);
+ exp_reply = ip_conntrack_expect_alloc(ct);
if (exp_reply == NULL)
goto out_put_orig;
- memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+ /* original direction, PNS->PAC */
+ exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ exp_orig->tuple.src.u.gre.key = peer_callid;
+ exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+ exp_orig->tuple.dst.u.gre.key = callid;
+ exp_orig->tuple.dst.protonum = IPPROTO_GRE;
exp_orig->mask.src.ip = 0xffffffff;
exp_orig->mask.src.u.all = 0;
- exp_orig->mask.dst.u.all = 0;
exp_orig->mask.dst.u.gre.key = htons(0xffff);
exp_orig->mask.dst.ip = 0xffffffff;
exp_orig->mask.dst.protonum = 0xff;
-
- exp_orig->master = master;
+
+ exp_orig->master = ct;
exp_orig->expectfn = pptp_expectfn;
exp_orig->flags = 0;
/* both expectations are identical apart from tuple */
memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
- memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
- if (ip_nat_pptp_hook_exp_gre)
- ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
- else {
-
- DEBUGP("calling expect_related PNS->PAC");
- DUMP_TUPLE(&exp_orig->tuple);
-
- if (ip_conntrack_expect_related(exp_orig) != 0) {
- DEBUGP("cannot expect_related()\n");
- goto out_put_both;
- }
+ /* reply direction, PAC->PNS */
+ exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ exp_reply->tuple.src.u.gre.key = callid;
+ exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ exp_reply->tuple.dst.u.gre.key = peer_callid;
+ exp_reply->tuple.dst.protonum = IPPROTO_GRE;
- DEBUGP("calling expect_related PAC->PNS");
- DUMP_TUPLE(&exp_reply->tuple);
-
- if (ip_conntrack_expect_related(exp_reply) != 0) {
- DEBUGP("cannot expect_related()\n");
- goto out_unexpect_orig;
- }
-
- /* Add GRE keymap entries */
- if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
- DEBUGP("cannot keymap_add() exp\n");
- goto out_unexpect_both;
- }
-
- invert_tuplepr(&inv_tuple, &exp_reply->tuple);
- if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
- ip_ct_gre_keymap_destroy(master);
- DEBUGP("cannot keymap_add() exp_inv\n");
- goto out_unexpect_both;
- }
- ret = 0;
+ if (ip_nat_pptp_hook_exp_gre)
+ ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+ if (ip_conntrack_expect_related(exp_orig) != 0)
+ goto out_put_both;
+ if (ip_conntrack_expect_related(exp_reply) != 0)
+ goto out_unexpect_orig;
+
+ /* Add GRE keymap entries */
+ if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
+ goto out_unexpect_both;
+ if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
+ ip_ct_gre_keymap_destroy(ct);
+ goto out_unexpect_both;
}
+ ret = 0;
out_put_both:
ip_conntrack_expect_put(exp_reply);
@@ -322,73 +292,36 @@ out_unexpect_orig:
goto out_put_both;
}
-static inline int
+static inline int
pptp_inbound_pkt(struct sk_buff **pskb,
- struct tcphdr *tcph,
- unsigned int nexthdr_off,
- unsigned int datalen,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
u_int16_t msg;
- __be16 *cid, *pcid;
- u_int32_t seq;
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
+ __be16 cid = 0, pcid = 0;
msg = ntohs(ctlh->messageType);
DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
switch (msg) {
case PPTP_START_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.srep)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server confirms new control session */
- if (info->sstate < PPTP_SESSION_REQUESTED) {
- DEBUGP("%s without START_SESS_REQUEST\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate < PPTP_SESSION_REQUESTED)
+ goto invalid;
if (pptpReq->srep.resultCode == PPTP_START_OK)
info->sstate = PPTP_SESSION_CONFIRMED;
- else
+ else
info->sstate = PPTP_SESSION_ERROR;
break;
case PPTP_STOP_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.strep)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server confirms end of control session */
- if (info->sstate > PPTP_SESSION_STOPREQ) {
- DEBUGP("%s without STOP_SESS_REQUEST\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate > PPTP_SESSION_STOPREQ)
+ goto invalid;
if (pptpReq->strep.resultCode == PPTP_STOP_OK)
info->sstate = PPTP_SESSION_NONE;
else
@@ -396,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb,
break;
case PPTP_OUT_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.ocack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server accepted call, we now expect GRE frames */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
if (info->cstate != PPTP_CALL_OUT_REQ &&
- info->cstate != PPTP_CALL_OUT_CONF) {
- DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+ info->cstate != PPTP_CALL_OUT_CONF)
+ goto invalid;
+
+ cid = pptpReq->ocack.callID;
+ pcid = pptpReq->ocack.peersCallID;
+ if (info->pns_call_id != pcid)
+ goto invalid;
+ DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+ info->cstate = PPTP_CALL_OUT_CONF;
+ info->pac_call_id = cid;
+ exp_gre(ct, cid, pcid);
+ } else
info->cstate = PPTP_CALL_NONE;
- break;
- }
-
- cid = &pptpReq->ocack.callID;
- pcid = &pptpReq->ocack.peersCallID;
-
- info->pac_call_id = ntohs(*cid);
-
- if (htons(info->pns_call_id) != *pcid) {
- DEBUGP("%s for unknown callid %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
-
- DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
- ntohs(*cid), ntohs(*pcid));
-
- info->cstate = PPTP_CALL_OUT_CONF;
-
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
break;
case PPTP_IN_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server tells us about incoming call request */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+
+ cid = pptpReq->icreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
info->cstate = PPTP_CALL_IN_REQ;
- info->pac_call_id = ntohs(*pcid);
+ info->pac_call_id = cid;
break;
case PPTP_IN_CALL_CONNECT:
- if (reqlen < sizeof(_pptpReq.iccon)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server tells us about incoming call established */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- if (info->cstate != PPTP_CALL_IN_REP
- && info->cstate != PPTP_CALL_IN_CONF) {
- DEBUGP("%s but never sent IN_CALL_REPLY\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ if (info->cstate != PPTP_CALL_IN_REP &&
+ info->cstate != PPTP_CALL_IN_CONF)
+ goto invalid;
- pcid = &pptpReq->iccon.peersCallID;
- cid = &info->pac_call_id;
+ pcid = pptpReq->iccon.peersCallID;
+ cid = info->pac_call_id;
- if (info->pns_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown CallID %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
+ if (info->pns_call_id != pcid)
+ goto invalid;
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
info->cstate = PPTP_CALL_IN_CONF;
/* we expect a GRE connection from PAC to PNS */
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
-
+ exp_gre(ct, cid, pcid);
break;
case PPTP_CALL_DISCONNECT_NOTIFY:
- if (reqlen < sizeof(_pptpReq.disc)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* server confirms disconnect */
- cid = &pptpReq->disc.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+ cid = pptpReq->disc.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
info->cstate = PPTP_CALL_NONE;
/* untrack this call id, unexpect GRE packets */
@@ -513,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb,
break;
case PPTP_WAN_ERROR_NOTIFY:
- break;
-
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* I don't have to explain these ;) */
break;
default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
- ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
- break;
+ goto invalid;
}
-
if (ip_nat_pptp_hook_inbound)
return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
pptpReq);
-
return NF_ACCEPT;
+invalid:
+ DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+ return NF_ACCEPT;
}
static inline int
pptp_outbound_pkt(struct sk_buff **pskb,
- struct tcphdr *tcph,
- unsigned int nexthdr_off,
- unsigned int datalen,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
u_int16_t msg;
- __be16 *cid, *pcid;
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh)
- return NF_ACCEPT;
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq)
- return NF_ACCEPT;
+ __be16 cid = 0, pcid = 0;
msg = ntohs(ctlh->messageType);
DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -568,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
switch (msg) {
case PPTP_START_SESSION_REQUEST:
/* client requests for new control session */
- if (info->sstate != PPTP_SESSION_NONE) {
- DEBUGP("%s but we already have one",
- pptp_msg_name[msg]);
- }
+ if (info->sstate != PPTP_SESSION_NONE)
+ goto invalid;
info->sstate = PPTP_SESSION_REQUESTED;
break;
case PPTP_STOP_SESSION_REQUEST:
@@ -580,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb,
break;
case PPTP_OUT_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.ocreq)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- /* FIXME: break; */
- }
-
/* client initiating connection to server */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n",
- pptp_msg_name[msg]);
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
info->cstate = PPTP_CALL_OUT_REQ;
/* track PNS call id */
- cid = &pptpReq->ocreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
- info->pns_call_id = ntohs(*cid);
+ cid = pptpReq->ocreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->pns_call_id = cid;
break;
case PPTP_IN_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
/* client answers incoming call */
- if (info->cstate != PPTP_CALL_IN_REQ
- && info->cstate != PPTP_CALL_IN_REP) {
- DEBUGP("%s without incall_req\n",
- pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+ if (info->cstate != PPTP_CALL_IN_REQ &&
+ info->cstate != PPTP_CALL_IN_REP)
+ goto invalid;
+
+ cid = pptpReq->icack.callID;
+ pcid = pptpReq->icack.peersCallID;
+ if (info->pac_call_id != pcid)
+ goto invalid;
+ DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+ /* part two of the three-way handshake */
+ info->cstate = PPTP_CALL_IN_REP;
+ info->pns_call_id = cid;
+ } else
info->cstate = PPTP_CALL_NONE;
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- if (info->pac_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown call %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
- /* part two of the three-way handshake */
- info->cstate = PPTP_CALL_IN_REP;
- info->pns_call_id = ntohs(pptpReq->icack.callID);
break;
case PPTP_CALL_CLEAR_REQUEST:
/* client requests hangup of call */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("CLEAR_CALL but no session\n");
- break;
- }
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
/* FUTURE: iterate over all calls and check if
* call ID is valid. We don't do this without newnat,
* because we only know about last call */
info->cstate = PPTP_CALL_CLEAR_REQ;
break;
case PPTP_SET_LINK_INFO:
- break;
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* I don't have to explain these ;) */
break;
default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0], msg);
- /* unknown: no need to create GRE masq table entry */
- break;
+ goto invalid;
}
-
+
if (ip_nat_pptp_hook_outbound)
return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
pptpReq);
+ return NF_ACCEPT;
+invalid:
+ DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
return NF_ACCEPT;
}
+static const unsigned int pptp_msg_size[] = {
+ [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
+ [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
+ [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
+ [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
+ [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
+ [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
+ [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
+ [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
+ [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
+ [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
+ [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+ [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
+ [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
+};
/* track caller id inside control connection, call expect_related */
-static int
+static int
conntrack_pptp_help(struct sk_buff **pskb,
struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
{
- struct pptp_pkt_hdr _pptph, *pptph;
- struct tcphdr _tcph, *tcph;
- u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
- u_int32_t datalen;
int dir = CTINFO2DIR(ctinfo);
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- unsigned int nexthdr_off;
-
+ struct tcphdr _tcph, *tcph;
+ struct pptp_pkt_hdr _pptph, *pptph;
+ struct PptpControlHeader _ctlh, *ctlh;
+ union pptp_ctrl_union _pptpReq, *pptpReq;
+ unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+ unsigned int datalen, reqlen, nexthdr_off;
int oldsstate, oldcstate;
int ret;
+ u_int16_t msg;
/* don't do any tracking before tcp handshake complete */
- if (ctinfo != IP_CT_ESTABLISHED
+ if (ctinfo != IP_CT_ESTABLISHED
&& ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
DEBUGP("ctinfo = %u, skipping\n", ctinfo);
return NF_ACCEPT;
}
-
+
nexthdr_off = (*pskb)->nh.iph->ihl*4;
tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
BUG_ON(!tcph);
nexthdr_off += tcph->doff * 4;
datalen = tcplen - tcph->doff * 4;
- if (tcph->fin || tcph->rst) {
- DEBUGP("RST/FIN received, timeouting GRE\n");
- /* can't do this after real newnat */
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_destroy_siblings(ct);
- }
-
pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
if (!pptph) {
DEBUGP("no full PPTP header, can't track\n");
@@ -712,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb,
return NF_ACCEPT;
}
+ ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ if (!ctlh)
+ return NF_ACCEPT;
+ nexthdr_off += sizeof(_ctlh);
+ datalen -= sizeof(_ctlh);
+
+ reqlen = datalen;
+ msg = ntohs(ctlh->messageType);
+ if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+ return NF_ACCEPT;
+ if (reqlen > sizeof(*pptpReq))
+ reqlen = sizeof(*pptpReq);
+
+ pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ if (!pptpReq)
+ return NF_ACCEPT;
+
oldsstate = info->sstate;
oldcstate = info->cstate;
@@ -721,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb,
* established from PNS->PAC. However, RFC makes no guarantee */
if (dir == IP_CT_DIR_ORIGINAL)
/* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+ ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
ctinfo);
else
/* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+ ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
ctinfo);
DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
oldsstate, info->sstate, oldcstate, info->cstate);
@@ -735,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb,
}
/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
+static struct ip_conntrack_helper pptp = {
.list = { NULL, NULL },
- .name = "pptp",
+ .name = "pptp",
.me = THIS_MODULE,
.max_expected = 2,
.timeout = 5 * 60,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) } }
- },
- .dst = { .ip = 0,
+ .tuple = { .src = { .ip = 0,
+ .u = { .tcp = { .port =
+ __constant_htons(PPTP_CONTROL_PORT) } }
+ },
+ .dst = { .ip = 0,
.u = { .all = 0 },
.protonum = IPPROTO_TCP
- }
+ }
},
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = __constant_htons(0xffff) } }
- },
- .dst = { .ip = 0,
+ .mask = { .src = { .ip = 0,
+ .u = { .tcp = { .port = __constant_htons(0xffff) } }
+ },
+ .dst = { .ip = 0,
.u = { .all = 0 },
- .protonum = 0xff
- }
+ .protonum = 0xff
+ }
},
- .help = conntrack_pptp_help
+ .help = conntrack_pptp_help,
+ .destroy = pptp_destroy_siblings,
};
extern void ip_ct_proto_gre_fini(void);
@@ -768,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void);
static int __init ip_conntrack_helper_pptp_init(void)
{
int retcode;
-
+
retcode = ip_ct_proto_gre_init();
if (retcode < 0)
return retcode;
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index a566a81325b2..3d0b438783db 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
+#include <linux/if_addr.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <net/route.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 0d4cc92391fa..52eddea27e93 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
/* dump everything */
events = ~0UL;
group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS |
- IPCT_PROTOINFO |
- IPCT_HELPER |
- IPCT_HELPINFO |
- IPCT_NATINFO)) {
+ } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
@@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
goto nfattr_failure;
+ if (events & IPCT_MARK
+ && ctnetlink_dump_mark(skb, ct) < 0)
+ goto nfattr_failure;
+
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, group, 0);
return NOTIFY_DONE;
@@ -436,6 +436,11 @@ restart:
cb->args[1] = (unsigned long)ct;
goto out;
}
+#ifdef CONFIG_NF_CT_ACCT
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+ IPCTNL_MSG_CT_GET_CTRZERO)
+ memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
}
if (cb->args[1]) {
cb->args[1] = 0;
@@ -451,46 +456,6 @@ out:
return skb->len;
}
-#ifdef CONFIG_IP_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack *ct = NULL;
- struct ip_conntrack_tuple_hash *h;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[1];
-
- DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
- cb->args[0], *id);
-
- write_lock_bh(&ip_conntrack_lock);
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = tuplehash_to_ctrack(h);
- if (ct->id <= *id)
- continue;
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0)
- goto out;
- *id = ct->id;
-
- memset(&ct->counters, 0, sizeof(ct->counters));
- }
- }
-out:
- write_unlock_bh(&ip_conntrack_lock);
-
- DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
- return skb->len;
-}
-#endif
-
static const size_t cta_min_ip[CTA_IP_MAX] = {
[CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
[CTA_IP_V4_DST-1] = sizeof(u_int32_t),
@@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (msg->nfgen_family != AF_INET)
return -EAFNOSUPPORT;
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_IP_NF_CT_ACCT
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table_w,
- ctnetlink_done)) != 0)
- return -EINVAL;
-#else
+#ifndef CONFIG_IP_NF_CT_ACCT
+ if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
return -ENOTSUPP;
#endif
- } else {
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
+ if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+ ctnetlink_dump_table,
+ ctnetlink_done)) != 0)
return -EINVAL;
- }
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
@@ -1253,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
} else
return NOTIFY_DONE;
+ if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+ return NOTIFY_DONE;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index f891308b5e4c..36f2b5e5d80a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned int ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
static int generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 4ee016c427b4..5fe026f467d3 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,15 +1,15 @@
/*
- * ip_conntrack_proto_gre.c - Version 3.0
+ * ip_conntrack_proto_gre.c - Version 3.0
*
* Connection tracking protocol helper module for GRE.
*
* GRE is a generic encapsulation protocol, which is generally not very
* suited for NAT, as it has no protocol-specific part as port numbers.
*
- * It has an optional key field, which may help us distinguishing two
+ * It has an optional key field, which may help us distinguishing two
* connections between the same two hosts.
*
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
*
* PPTP is built on top of a modified version of GRE, and has a mandatory
* field called "CallID", which serves us for the same purpose as the key
@@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock);
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -62,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
#define DEBUGP(x, args...)
#define DUMP_TUPLE_GRE(x)
#endif
-
+
/* GRE KEYMAP HANDLING FUNCTIONS */
static LIST_HEAD(gre_keymap_list);
@@ -82,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
__be16 key = 0;
read_lock_bh(&ip_ct_gre_lock);
- km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (km)
- key = km->tuple.src.u.gre.key;
+ list_for_each_entry(km, &gre_keymap_list, list) {
+ if (gre_key_cmpfn(km, t)) {
+ key = km->tuple.src.u.gre.key;
+ break;
+ }
+ }
read_unlock_bh(&ip_ct_gre_lock);
-
+
DEBUGP("lookup src key 0x%x up key for ", key);
DUMP_TUPLE_GRE(t);
@@ -99,28 +100,25 @@ int
ip_ct_gre_keymap_add(struct ip_conntrack *ct,
struct ip_conntrack_tuple *t, int reply)
{
- struct ip_ct_gre_keymap **exist_km, *km, *old;
+ struct ip_ct_gre_keymap **exist_km, *km;
if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
DEBUGP("refusing to add GRE keymap to non-pptp session\n");
return -1;
}
- if (!reply)
+ if (!reply)
exist_km = &ct->help.ct_pptp_info.keymap_orig;
else
exist_km = &ct->help.ct_pptp_info.keymap_reply;
if (*exist_km) {
/* check whether it's a retransmission */
- old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (old == *exist_km) {
- DEBUGP("retransmission\n");
- return 0;
+ list_for_each_entry(km, &gre_keymap_list, list) {
+ if (gre_key_cmpfn(km, t) && km == *exist_km)
+ return 0;
}
-
- DEBUGP("trying to override keymap_%s for ct %p\n",
+ DEBUGP("trying to override keymap_%s for ct %p\n",
reply? "reply":"orig", ct);
return -EEXIST;
}
@@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
DUMP_TUPLE_GRE(&km->tuple);
write_lock_bh(&ip_ct_gre_lock);
- list_append(&gre_keymap_list, km);
+ list_add_tail(&km->list, &gre_keymap_list);
write_unlock_bh(&ip_ct_gre_lock);
return 0;
@@ -154,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
write_lock_bh(&ip_ct_gre_lock);
if (ct->help.ct_pptp_info.keymap_orig) {
- DEBUGP("removing %p from list\n",
+ DEBUGP("removing %p from list\n",
ct->help.ct_pptp_info.keymap_orig);
list_del(&ct->help.ct_pptp_info.keymap_orig->list);
kfree(ct->help.ct_pptp_info.keymap_orig);
@@ -222,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
static int gre_print_tuple(struct seq_file *s,
const struct ip_conntrack_tuple *tuple)
{
- return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
ntohs(tuple->src.u.gre.key),
ntohs(tuple->dst.u.gre.key));
}
@@ -252,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct,
} else
ip_ct_refresh_acct(ct, conntrackinfo, skb,
ct->proto.gre.timeout);
-
+
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int gre_new(struct ip_conntrack *ct,
const struct sk_buff *skb)
-{
+{
DEBUGP(": ");
DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
@@ -285,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct)
}
/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
+static struct ip_conntrack_protocol gre = {
.proto = IPPROTO_GRE,
- .name = "gre",
+ .name = "gre",
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
.print_tuple = gre_print_tuple,
@@ -325,7 +323,7 @@ void ip_ct_proto_gre_fini(void)
}
write_unlock_bh(&ip_ct_gre_lock);
- ip_conntrack_protocol_unregister(&gre);
+ ip_conntrack_protocol_unregister(&gre);
}
EXPORT_SYMBOL(ip_ct_gre_keymap_add);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 23f1c504586d..09c40ebe3345 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -21,7 +21,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned int ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 2d3612cd5f18..b908a4842e18 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int ip_ct_sctp_timeout_closed = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_established = 5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
static const unsigned int * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fb920e76ec10..03ae9a04cb37 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock);
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal = 0;
+int ip_ct_tcp_be_liberal __read_mostly = 0;
/* When connection is picked up from the middle, how many packets are required
to pass in each direction when we assume we are in sync - if any side uses
window scaling, we lost the game.
If it is set to zero, we disable picking up already established
connections. */
-int ip_ct_tcp_loose = 3;
+int ip_ct_tcp_loose __read_mostly = 3;
/* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer
will be started. */
-int ip_ct_tcp_max_retrans = 3;
+int ip_ct_tcp_max_retrans __read_mostly = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
@@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS;
-unsigned int ip_ct_tcp_timeout_established = 5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close = 10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
+unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
+unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
static const unsigned int * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
@@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
- && state->last_end == end)
+ && state->last_end == end
+ && state->last_win == win)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
+ state->last_win = win;
state->retrans = 0;
}
}
@@ -865,8 +867,7 @@ static int tcp_error(struct sk_buff *skb,
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
+ * because it is assumed to be correct.
*/
/* FIXME: Source route IP option packets --RR */
if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 9b2c16b4d2ff..d0e8a16970ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -18,8 +18,8 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned int ip_ct_udp_timeout = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
+ * because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index 4f222d6be009..2893e9c74850 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/skbuff.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7a9fa04a467a..02135756562e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -35,7 +35,6 @@
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -534,7 +533,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
/* Sysctl support */
-int ip_conntrack_checksum = 1;
+int ip_conntrack_checksum __read_mostly = 1;
#ifdef CONFIG_SYSCTL
@@ -563,7 +562,7 @@ extern unsigned int ip_ct_udp_timeout_stream;
/* From ip_conntrack_proto_icmp.c */
extern unsigned int ip_ct_icmp_timeout;
-/* From ip_conntrack_proto_icmp.c */
+/* From ip_conntrack_proto_generic.c */
extern unsigned int ip_ct_generic_timeout;
/* Log invalid packets of a given protocol */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1741d555ad0d..71f3e09cbc84 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,9 +22,6 @@
#include <linux/udp.h>
#include <linux/jhash.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -33,7 +30,6 @@
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
write_unlock_bh(&ip_nat_lock);
}
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong. Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
/* Is this tuple already taken? (not by us) */
int
ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
- iph->check);
+ iph->check = nf_csum_update(~iph->saddr, target->src.ip,
+ iph->check);
iph->saddr = target->src.ip;
} else {
- iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
- iph->check);
+ iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
+ iph->check);
iph->daddr = target->dst.ip;
}
return 1;
@@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct,
EXPORT_SYMBOL_GPL(ip_nat_packet);
/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_nat_manip_type manip,
- enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
struct {
struct icmphdr icmp;
@@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
} *inside;
struct ip_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
+ enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
@@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
- hdrlen = (*pskb)->nh.iph->ihl * 4;
- if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen, 0)))
- return 0;
- }
+ if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+ return 0;
/* Must be RELATED */
IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
!manip))
return 0;
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- inside->icmp.checksum = 0;
- inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen,
- 0));
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt inner. */
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+ (*pskb)->len - hdrlen,
+ 0));
+ }
/* Change outer to look the reply to an incoming packet
* (proto 0 means don't invert per-proto part). */
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index cbcaa45370ae..7f6a75984f6c 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -27,16 +27,12 @@
#include <net/tcp.h>
#include <net/udp.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -165,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
{
struct iphdr *iph;
struct tcphdr *tcph;
- int datalen;
+ int oldlen, datalen;
if (!skb_make_writable(pskb, (*pskb)->len))
return 0;
@@ -180,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
iph = (*pskb)->nh.iph;
tcph = (void *)iph + iph->ihl*4;
+ oldlen = (*pskb)->len - iph->ihl*4;
mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
match_offset, match_len, rep_buffer, rep_len);
datalen = (*pskb)->len - iph->ihl*4;
- tcph->check = 0;
- tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
- csum_partial((char *)tcph, datalen, 0));
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, datalen,
+ iph->saddr, iph->daddr,
+ csum_partial((char *)tcph,
+ datalen, 0));
+ } else
+ tcph->check = nf_proto_csum_update(*pskb,
+ htons(oldlen) ^ 0xFFFF,
+ htons(datalen),
+ tcph->check, 1);
if (rep_len != match_len) {
set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
{
struct iphdr *iph;
struct udphdr *udph;
+ int datalen, oldlen;
/* UDP helpers might accidentally mangle the wrong packet */
iph = (*pskb)->nh.iph;
@@ -238,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
iph = (*pskb)->nh.iph;
udph = (void *)iph + iph->ihl*4;
+
+ oldlen = (*pskb)->len - iph->ihl*4;
mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
/* update the length of the UDP packet */
- udph->len = htons((*pskb)->len - iph->ihl*4);
+ datalen = (*pskb)->len - iph->ihl*4;
+ udph->len = htons(datalen);
/* fix udp checksum if udp checksum was previously calculated */
- if (udph->check) {
- int datalen = (*pskb)->len - iph->ihl * 4;
+ if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+ return 1;
+
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
udph->check = 0;
udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
datalen, IPPROTO_UDP,
csum_partial((char *)udph,
datalen, 0));
- }
-
+ if (!udph->check)
+ udph->check = -1;
+ } else
+ udph->check = nf_proto_csum_update(*pskb,
+ htons(oldlen) ^ 0xFFFF,
+ htons(datalen),
+ udph->check, 1);
return 1;
}
EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +309,14 @@ sack_adjust(struct sk_buff *skb,
ntohl(sack->start_seq), new_start_seq,
ntohl(sack->end_seq), new_end_seq);
- tcph->check =
- ip_nat_cheat_check(~sack->start_seq, new_start_seq,
- ip_nat_cheat_check(~sack->end_seq,
- new_end_seq,
- tcph->check));
+ tcph->check = nf_proto_csum_update(skb,
+ ~sack->start_seq,
+ new_start_seq,
+ tcph->check, 0);
+ tcph->check = nf_proto_csum_update(skb,
+ ~sack->end_seq,
+ new_end_seq,
+ tcph->check, 0);
sack->start_seq = new_start_seq;
sack->end_seq = new_end_seq;
sackoff += sizeof(*sack);
@@ -381,10 +400,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
newack = ntohl(tcph->ack_seq) - other_way->offset_before;
newack = htonl(newack);
- tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
- ip_nat_cheat_check(~tcph->ack_seq,
- newack,
- tcph->check));
+ tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
+ tcph->check, 0);
+ tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
+ tcph->check, 0);
DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 1d149964dc38..2ff578807123 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -32,7 +32,7 @@
* 2005-06-10 - Version 3.0
* - kernel >= 2.6.11 version,
* funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
+ *
*/
#include <linux/module.h>
@@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
DEBUGP("we are PNS->PAC\n");
/* therefore, build tuple for PAC->PNS */
t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+ t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+ t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
t.dst.protonum = IPPROTO_GRE;
} else {
DEBUGP("we are PAC->PNS\n");
/* build tuple for PNS->PAC */
t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- t.src.u.gre.key =
- htons(master->nat.help.nat_pptp_info.pns_call_id);
+ t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- t.dst.u.gre.key =
- htons(master->nat.help.nat_pptp_info.pac_call_id);
+ t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
t.dst.protonum = IPPROTO_GRE;
}
@@ -149,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb,
{
struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_callid;
+ u_int16_t msg;
+ __be16 new_callid;
unsigned int cid_off;
- new_callid = htons(ct_pptp_info->pns_call_id);
-
+ new_callid = ct_pptp_info->pns_call_id;
+
switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
- /* FIXME: ideally we would want to reserve a call ID
- * here. current netfilter NAT core is not able to do
- * this :( For now we use TCP source port. This breaks
- * multiple calls within one control session */
-
- /* save original call ID in nat_info */
- nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
- /* don't use tcph->source since we are at a DSTmanip
- * hook (e.g. PREROUTING) and pkt is not mangled yet */
- new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- /* save new call ID in ct info */
- ct_pptp_info->pns_call_id = ntohs(new_callid);
- break;
- case PPTP_IN_CALL_REPLY:
- cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
- break;
- case PPTP_CALL_CLEAR_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
- break;
- default:
- DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
- (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_SET_LINK_INFO:
- /* only need to NAT in case PAC is behind NAT box */
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
+ case PPTP_OUT_CALL_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = new_callid;
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+ break;
+ default:
+ DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+ (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0]);
+ /* fall through */
+
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
}
/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
@@ -212,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb,
return NF_ACCEPT;
}
-static int
+static void
pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
struct ip_conntrack_expect *expect_reply)
{
- struct ip_ct_pptp_master *ct_pptp_info =
- &expect_orig->master->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info =
- &expect_orig->master->nat.help.nat_pptp_info;
-
struct ip_conntrack *ct = expect_orig->master;
-
- struct ip_conntrack_tuple inv_t;
- struct ip_conntrack_tuple *orig_t, *reply_t;
+ struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+ struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
/* save original PAC call ID in nat_info */
nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
- /* alter expectation */
- orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
/* alter expectation for PNS->PAC direction */
- invert_tuplepr(&inv_t, &expect_orig->tuple);
- expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
- expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
- expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+ expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+ expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+ expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
expect_orig->dir = IP_CT_DIR_ORIGINAL;
- inv_t.src.ip = reply_t->src.ip;
- inv_t.dst.ip = reply_t->dst.ip;
- inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
- inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
-
- if (!ip_conntrack_expect_related(expect_orig)) {
- DEBUGP("successfully registered expect\n");
- } else {
- DEBUGP("can't expect_related(expect_orig)\n");
- return 1;
- }
/* alter expectation for PAC->PNS direction */
- invert_tuplepr(&inv_t, &expect_reply->tuple);
- expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
- expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
- expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+ expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+ expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+ expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
expect_reply->dir = IP_CT_DIR_REPLY;
- inv_t.src.ip = orig_t->src.ip;
- inv_t.dst.ip = orig_t->dst.ip;
- inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
- inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
-
- if (!ip_conntrack_expect_related(expect_reply)) {
- DEBUGP("successfully registered expect\n");
- } else {
- DEBUGP("can't expect_related(expect_reply)\n");
- ip_conntrack_unexpect_related(expect_orig);
- return 1;
- }
-
- if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
- DEBUGP("can't register original keymap\n");
- ip_conntrack_unexpect_related(expect_orig);
- ip_conntrack_unexpect_related(expect_reply);
- return 1;
- }
-
- if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
- DEBUGP("can't register reply keymap\n");
- ip_conntrack_unexpect_related(expect_orig);
- ip_conntrack_unexpect_related(expect_reply);
- ip_ct_gre_keymap_destroy(ct);
- return 1;
- }
-
- return 0;
}
/* inbound packets == from PAC to PNS */
@@ -297,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb,
union pptp_ctrl_union *pptpReq)
{
struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_cid = 0, new_pcid;
- unsigned int pcid_off, cid_off = 0;
+ u_int16_t msg;
+ __be16 new_pcid;
+ unsigned int pcid_off;
- new_pcid = htons(nat_pptp_info->pns_call_id);
+ new_pcid = nat_pptp_info->pns_call_id;
switch (msg = ntohs(ctlh->messageType)) {
case PPTP_OUT_CALL_REPLY:
pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
- cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
break;
case PPTP_IN_CALL_CONNECT:
pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
@@ -324,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
break;
default:
- DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
+ DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
pptp_msg_name[msg]:pptp_msg_name[0]);
/* fall through */
@@ -351,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
sizeof(new_pcid), (char *)&new_pcid,
sizeof(new_pcid)) == 0)
return NF_DROP;
-
- if (new_cid) {
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid));
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_cid), (char *)&new_cid,
- sizeof(new_cid)) == 0)
- return NF_DROP;
- }
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 38acfdf540eb..bf91f9312b3c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -6,10 +6,10 @@
* GRE is a generic encapsulation protocol, which is generally not very
* suited for NAT, as it has no protocol-specific part as port numbers.
*
- * It has an optional key field, which may help us distinguishing two
+ * It has an optional key field, which may help us distinguishing two
* connections between the same two hosts.
*
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
*
* PPTP is built on top of a modified version of GRE, and has a mandatory
* field called "CallID", which serves us for the same purpose as the key
@@ -60,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple,
}
/* generate unique tuple ... */
-static int
+static int
gre_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range,
enum ip_nat_manip_type maniptype,
const struct ip_conntrack *conntrack)
{
static u_int16_t key;
- u_int16_t *keyptr;
+ __be16 *keyptr;
unsigned int min, i, range_size;
if (maniptype == IP_NAT_MANIP_SRC)
@@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple,
range_size = ntohs(range->max.gre.key) - min + 1;
}
- DEBUGP("min = %u, range_size = %u\n", min, range_size);
+ DEBUGP("min = %u, range_size = %u\n", min, range_size);
for (i = 0; i < range_size; i++, key++) {
*keyptr = htons(min + key % range_size);
@@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb,
greh = (void *)(*pskb)->data + hdroff;
pgreh = (struct gre_hdr_pptp *) greh;
- /* we only have destination manip of a packet, since 'source key'
+ /* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype == IP_NAT_MANIP_DST) {
/* key manipulation is always dest */
@@ -129,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb,
}
if (greh->csum) {
/* FIXME: Never tested this code... */
- *(gre_csum(greh)) =
- ip_nat_cheat_check(~*(gre_key(greh)),
+ *(gre_csum(greh)) =
+ nf_proto_csum_update(*pskb,
+ ~*(gre_key(greh)),
tuple->dst.u.gre.key,
- *(gre_csum(greh)));
+ *(gre_csum(greh)), 0);
}
*(gre_key(greh)) = tuple->dst.u.gre.key;
break;
case GRE_VERSION_PPTP:
- DEBUGP("call_id -> 0x%04x\n",
+ DEBUGP("call_id -> 0x%04x\n",
ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
@@ -151,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb,
}
/* nat helper struct */
-static struct ip_nat_protocol gre = {
- .name = "GRE",
+static struct ip_nat_protocol gre = {
+ .name = "GRE",
.protonum = IPPROTO_GRE,
.manip_pkt = gre_manip_pkt,
.in_range = gre_in_range,
@@ -163,7 +164,7 @@ static struct ip_nat_protocol gre = {
.nfattr_to_range = ip_nat_port_nfattr_to_range,
#endif
};
-
+
int __init ip_nat_proto_gre_init(void)
{
return ip_nat_protocol_register(&gre);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 31a3f4ccb99c..ec50cc295317 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
return 0;
hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-
- hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
- tuple->src.u.icmp.id,
- hdr->checksum);
+ hdr->checksum = nf_proto_csum_update(*pskb,
+ hdr->un.echo.id ^ 0xFFFF,
+ tuple->src.u.icmp.id,
+ hdr->checksum, 0);
hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a3d14079eba6..72a6307bd2db 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
if (hdrsize < sizeof(*hdr))
return 1;
- hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(oldport ^ 0xFFFF,
- newport,
- hdr->check));
+ hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
+ hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
+ hdr->check, 0);
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index ec6053fdc867..5da196ae758c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
- if (hdr->check) /* 0 is a special case meaning no checksum */
- hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(*portptr ^ 0xFFFF,
- newport,
- hdr->check));
+
+ if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
+ hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
+ hdr->check, 1);
+ hdr->check = nf_proto_csum_update(*pskb,
+ *portptr ^ 0xFFFF, newport,
+ hdr->check, 0);
+ if (!hdr->check)
+ hdr->check = -1;
+ }
*portptr = newport;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aba926c1cb0..7b703839aa58 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,14 +19,10 @@
#include <net/route.h>
#include <linux/bitops.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct ipt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
@@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct ipt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
@@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename,
const void *entry,
const struct ipt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ip_nat_multi_range_compat *mr = targinfo;
@@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename,
const void *entry,
const struct ipt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ip_nat_multi_range_compat *mr = targinfo;
@@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb,
{
int ret;
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+ ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
if (ret == NF_ACCEPT) {
if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 17de077a7901..9c577db62047 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -30,9 +30,6 @@
#include <net/checksum.h>
#include <linux/spinlock.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -40,7 +37,6 @@
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
@@ -110,11 +106,6 @@ ip_nat_fn(unsigned int hooknum,
IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
& htons(IP_MF|IP_OFFSET)));
- /* If we had a hardware checksum before, it's now invalid */
- if ((*pskb)->ip_summed == CHECKSUM_HW)
- if (skb_checksum_help(*pskb, (out == NULL)))
- return NF_DROP;
-
ct = ip_conntrack_get(*pskb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
have dropped it. Hence it's the user's responsibilty to
@@ -145,8 +136,8 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
- CTINFO2DIR(ctinfo)))
+ if (!ip_nat_icmp_reply_translation(ct, ctinfo,
+ hooknum, pskb))
return NF_DROP;
else
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 198ac36db861..7edad790478a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -52,15 +52,15 @@ struct ipq_queue_entry {
typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
static LIST_HEAD(queue_list);
static DEFINE_MUTEX(ipqnl_mutex);
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
break;
case IPQ_COPY_PACKET:
- if (entry->skb->ip_summed == CHECKSUM_HW &&
- (*errp = skb_checksum_help(entry->skb,
- entry->info->outdev == NULL))) {
+ if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+ entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+ (*errp = skb_checksum_help(entry->skb))) {
read_unlock_bh(&queue_lock);
return NULL;
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 048514f15f2f..800067d69a9a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
if (net_ratelimit())
printk("ip_tables: error: `%s'\n", (char *)targinfo);
@@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct ipt_table *table,
- void *userdata)
+ struct ipt_table *table)
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
u_int16_t offset;
@@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb,
in, out,
hook,
t->u.kernel.target,
- t->data,
- userdata);
+ t->data);
#ifdef CONFIG_NETFILTER_DEBUG
if (((struct ipt_entry *)table_base)->comefrom
@@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
return 1;
if (m->u.kernel.match->destroy)
- m->u.kernel.match->destroy(m->u.kernel.match, m->data,
- m->u.match_size - sizeof(*m));
+ m->u.kernel.match->destroy(m->u.kernel.match, m->data);
module_put(m->u.kernel.match->me);
return 0;
}
@@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m,
if (m->u.kernel.match->checkentry
&& !m->u.kernel.match->checkentry(name, ip, match, m->data,
- m->u.match_size - sizeof(*m),
hookmask)) {
duprintf("ip_tables: check failed for `%s'.\n",
m->u.kernel.match->name);
@@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
if (t->u.kernel.target == &ipt_standard_target) {
if (!standard_check(t, size)) {
ret = -EINVAL;
- goto cleanup_matches;
+ goto err;
}
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, e, target, t->data,
- t->u.target_size
- - sizeof(*t),
e->comefrom)) {
duprintf("ip_tables: check failed for `%s'.\n",
t->u.kernel.target->name);
@@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
IPT_MATCH_ITERATE(e, cleanup_match, NULL);
t = ipt_get_target(e);
if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data,
- t->u.target_size - sizeof(*t));
+ t->u.kernel.target->destroy(t->u.kernel.target, t->data);
module_put(t->u.kernel.target->me);
return 0;
}
@@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset)
return delta;
}
-struct compat_ipt_standard_target
+static void compat_standard_from_user(void *dst, void *src)
{
- struct compat_xt_entry_target target;
- compat_int_t verdict;
-};
-
-struct compat_ipt_standard
-{
- struct compat_ipt_entry entry;
- struct compat_ipt_standard_target target;
-};
+ int v = *(compat_int_t *)src;
-#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target))
-#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target))
-#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN)
+ if (v > 0)
+ v += compat_calc_jump(v);
+ memcpy(dst, &v, sizeof(v));
+}
-static int compat_ipt_standard_fn(void *target,
- void **dstptr, int *size, int convert)
+static int compat_standard_to_user(void __user *dst, void *src)
{
- struct compat_ipt_standard_target compat_st, *pcompat_st;
- struct ipt_standard_target st, *pst;
- int ret;
+ compat_int_t cv = *(int *)src;
- ret = 0;
- switch (convert) {
- case COMPAT_TO_USER:
- pst = target;
- memcpy(&compat_st.target, &pst->target,
- sizeof(compat_st.target));
- compat_st.verdict = pst->verdict;
- if (compat_st.verdict > 0)
- compat_st.verdict -=
- compat_calc_jump(compat_st.verdict);
- compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
- if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
- ret = -EFAULT;
- *size -= IPT_ST_OFFSET;
- *dstptr += IPT_ST_COMPAT_LEN;
- break;
- case COMPAT_FROM_USER:
- pcompat_st = target;
- memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
- st.verdict = pcompat_st->verdict;
- if (st.verdict > 0)
- st.verdict += compat_calc_jump(st.verdict);
- st.target.u.user.target_size = IPT_ST_LEN;
- memcpy(*dstptr, &st, IPT_ST_LEN);
- *size += IPT_ST_OFFSET;
- *dstptr += IPT_ST_LEN;
- break;
- case COMPAT_CALC_SIZE:
- *size += IPT_ST_OFFSET;
- break;
- default:
- ret = -ENOPROTOOPT;
- break;
- }
- return ret;
+ if (cv > 0)
+ cv -= compat_calc_jump(cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
}
static inline int
compat_calc_match(struct ipt_entry_match *m, int * size)
{
- if (m->u.kernel.match->compat)
- m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
- else
- xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+ *size += xt_compat_match_offset(m->u.kernel.match);
return 0;
}
@@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
entry_offset = (void *)e - base;
IPT_MATCH_ITERATE(e, compat_calc_match, &off);
t = ipt_get_target(e);
- if (t->u.kernel.target->compat)
- t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
- else
- xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+ off += xt_compat_target_offset(t->u.kernel.target);
newinfo->size -= off;
ret = compat_add_offset(entry_offset, off);
if (ret)
@@ -1420,17 +1364,13 @@ struct compat_ipt_replace {
};
static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
- void __user **dstptr, compat_uint_t *size)
+ void * __user *dstptr, compat_uint_t *size)
{
- if (m->u.kernel.match->compat)
- return m->u.kernel.match->compat(m, dstptr, size,
- COMPAT_TO_USER);
- else
- return xt_compat_match(m, dstptr, size, COMPAT_TO_USER);
+ return xt_compat_match_to_user(m, dstptr, size);
}
static int compat_copy_entry_to_user(struct ipt_entry *e,
- void __user **dstptr, compat_uint_t *size)
+ void * __user *dstptr, compat_uint_t *size)
{
struct ipt_entry_target __user *t;
struct compat_ipt_entry __user *ce;
@@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
if (ret)
goto out;
t = ipt_get_target(e);
- if (t->u.kernel.target->compat)
- ret = t->u.kernel.target->compat(t, dstptr, size,
- COMPAT_TO_USER);
- else
- ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER);
+ ret = xt_compat_target_to_user(t, dstptr, size);
if (ret)
goto out;
ret = -EFAULT;
@@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m,
return match ? PTR_ERR(match) : -ENOENT;
}
m->u.kernel.match = match;
-
- if (m->u.kernel.match->compat)
- m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
- else
- xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+ *size += xt_compat_match_offset(match);
(*i)++;
return 0;
@@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
e->comefrom, &off, &j);
if (ret != 0)
- goto out;
+ goto cleanup_matches;
t = ipt_get_target(e);
target = try_then_request_module(xt_find_target(AF_INET,
@@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
if (IS_ERR(target) || !target) {
duprintf("check_entry: `%s' not found\n", t->u.user.name);
ret = target ? PTR_ERR(target) : -ENOENT;
- goto out;
+ goto cleanup_matches;
}
t->u.kernel.target = target;
- if (t->u.kernel.target->compat)
- t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
- else
- xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+ off += xt_compat_target_offset(target);
*size += off;
ret = compat_add_offset(entry_offset, off);
if (ret)
@@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
(*i)++;
return 0;
+
out:
+ module_put(t->u.kernel.target->me);
+cleanup_matches:
IPT_MATCH_ITERATE(e, cleanup_match, &j);
return ret;
}
static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
void **dstptr, compat_uint_t *size, const char *name,
- const struct ipt_ip *ip, unsigned int hookmask)
+ const struct ipt_ip *ip, unsigned int hookmask, int *i)
{
struct ipt_entry_match *dm;
struct ipt_match *match;
@@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
dm = (struct ipt_entry_match *)*dstptr;
match = m->u.kernel.match;
- if (match->compat)
- match->compat(m, dstptr, size, COMPAT_FROM_USER);
- else
- xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
+ xt_compat_match_from_user(m, dstptr, size);
ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
name, hookmask, ip->proto,
ip->invflags & IPT_INV_PROTO);
if (ret)
- return ret;
+ goto err;
if (m->u.kernel.match->checkentry
&& !m->u.kernel.match->checkentry(name, ip, match, dm->data,
- dm->u.match_size - sizeof(*dm),
hookmask)) {
duprintf("ip_tables: check failed for `%s'.\n",
m->u.kernel.match->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
+ (*i)++;
return 0;
+
+err:
+ module_put(m->u.kernel.match->me);
+ return ret;
}
static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
struct ipt_target *target;
struct ipt_entry *de;
unsigned int origsize;
- int ret, h;
+ int ret, h, j;
ret = 0;
origsize = *size;
de = (struct ipt_entry *)*dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
+ j = 0;
*dstptr += sizeof(struct compat_ipt_entry);
ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
- name, &de->ip, de->comefrom);
+ name, &de->ip, de->comefrom, &j);
if (ret)
- goto out;
+ goto cleanup_matches;
de->target_offset = e->target_offset - (origsize - *size);
t = ipt_get_target(e);
target = t->u.kernel.target;
- if (target->compat)
- target->compat(t, dstptr, size, COMPAT_FROM_USER);
- else
- xt_compat_target(t, dstptr, size, COMPAT_FROM_USER);
+ xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
for (h = 0; h < NF_IP_NUMHOOKS; h++) {
@@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
name, e->comefrom, e->ip.proto,
e->ip.invflags & IPT_INV_PROTO);
if (ret)
- goto out;
+ goto err;
ret = -EINVAL;
if (t->u.kernel.target == &ipt_standard_target) {
if (!standard_check(t, *size))
- goto out;
+ goto err;
} else if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, de, target,
- t->data, t->u.target_size - sizeof(*t),
- de->comefrom)) {
+ t->data, de->comefrom)) {
duprintf("ip_tables: compat: check failed for `%s'.\n",
t->u.kernel.target->name);
- goto out;
+ goto err;
}
ret = 0;
-out:
+ return ret;
+
+err:
+ module_put(t->u.kernel.target->me);
+cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
return ret;
}
@@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
return ret;
}
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
static int
compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
@@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = compat_get_entries(user, len);
break;
default:
- duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
- ret = -EINVAL;
+ ret = do_ipt_get_ctl(sk, cmd, user, len);
}
return ret;
}
@@ -2185,7 +2122,6 @@ icmp_checkentry(const char *tablename,
const void *info,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_icmp *icmpinfo = matchinfo;
@@ -2200,7 +2136,9 @@ static struct ipt_target ipt_standard_target = {
.targetsize = sizeof(int),
.family = AF_INET,
#ifdef CONFIG_COMPAT
- .compat = &compat_ipt_standard_fn,
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
#endif
};
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d994c5f5744c..41589665fc5d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -302,8 +302,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
enum ip_conntrack_info ctinfo;
@@ -373,7 +372,6 @@ checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -450,8 +448,7 @@ checkentry(const char *tablename,
}
/* drop reference count of cluster config when rule is deleted */
-static void destroy(const struct xt_target *target, void *targinfo,
- unsigned int targinfosize)
+static void destroy(const struct xt_target *target, void *targinfo)
{
struct ipt_clusterip_tgt_info *cipinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index c8e971288dfe..000000000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* iptables module for setting the IPv4 DSCP field, Version 1.8
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
-*/
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_DSCP.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_DSCP_info *dinfo = targinfo;
- u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-
- if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
- u_int16_t diffs[2];
-
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
- return NF_DROP;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
- | sh_dscp;
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^ 0xFFFF));
- }
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const void *e_void,
- const struct xt_target *target,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
-
- if ((dscp > IPT_DSCP_MAX)) {
- printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
- return 0;
- }
- return 1;
-}
-
-static struct ipt_target ipt_dscp_reg = {
- .name = "DSCP",
- .target = target,
- .targetsize = sizeof(struct ipt_DSCP_info),
- .table = "mangle",
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
- return ipt_register_target(&ipt_dscp_reg);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
- ipt_unregister_target(&ipt_dscp_reg);
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4adf5c9d34f5..23f9c7ebe7eb 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module");
static inline int
set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
- if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
- != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
- u_int16_t diffs[2];
+ struct iphdr *iph = (*pskb)->nh.iph;
+ u_int16_t oldtos;
+ if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return 0;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
- (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^0xFFFF));
+ iph = (*pskb)->nh.iph;
+ oldtos = iph->tos;
+ iph->tos &= ~IPT_ECN_IP_MASK;
+ iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+ iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+ iph->check);
}
return 1;
}
/* Return 0 if there was an error. */
static inline int
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
struct tcphdr _tcph, *tcph;
- u_int16_t diffs[2];
+ u_int16_t oldval;
/* Not enought header? */
tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
return 0;
tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
- if ((*pskb)->ip_summed == CHECKSUM_HW &&
- skb_checksum_help(*pskb, inward))
- return 0;
-
- diffs[0] = ((u_int16_t *)tcph)[6];
+ oldval = ((u_int16_t *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
tcph->ece = einfo->proto.tcp.ece;
if (einfo->operation & IPT_ECN_OP_SET_CWR)
tcph->cwr = einfo->proto.tcp.cwr;
- diffs[1] = ((u_int16_t *)tcph)[6];
- diffs[0] = diffs[0] ^ 0xFFFF;
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
- tcph->check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- tcph->check^0xFFFF));
+ tcph->check = nf_proto_csum_update((*pskb),
+ oldval ^ 0xFFFF,
+ ((u_int16_t *)tcph)[6],
+ tcph->check, 0);
return 1;
}
@@ -95,8 +85,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_ECN_info *einfo = targinfo;
@@ -106,7 +95,7 @@ target(struct sk_buff **pskb,
if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
&& (*pskb)->nh.iph->protocol == IPPROTO_TCP)
- if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+ if (!set_ect_tcp(pskb, einfo))
return NF_DROP;
return IPT_CONTINUE;
@@ -117,7 +106,6 @@ checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b98f7b08b084..7dc820df8bc5 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_log_info *loginfo = targinfo;
struct nf_loginfo li;
@@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_log_info *loginfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ebd94f2abf0d..bc65168a3437 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -42,7 +42,6 @@ masquerade_check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -64,8 +63,7 @@ masquerade_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 736c4b5a86a7..beb2914225ff 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -33,7 +33,6 @@ check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -55,8 +54,7 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f290463232de..f03d43671c6d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -36,7 +36,6 @@ redirect_check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -58,8 +57,7 @@ redirect_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 269bc2067cb8..b81821edd893 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
fl.proto = IPPROTO_TCP;
fl.fl_ip_sport = tcph->dest;
fl.fl_ip_dport = tcph->source;
+ security_skb_classify_flow(skb, &fl);
xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
@@ -184,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->urg_ptr = 0;
/* Adjust TCP checksum */
+ nskb->ip_summed = CHECKSUM_NONE;
tcph->check = 0;
tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
nskb->nh.iph->saddr,
@@ -226,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_reject_info *reject = targinfo;
@@ -275,7 +276,6 @@ static int check(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_reject_info *rejinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7169b09b5a67..efbcb1198832 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -52,7 +52,6 @@ same_check(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
unsigned int count, countess, rangeip, index = 0;
@@ -116,8 +115,7 @@ same_check(const char *tablename,
}
static void
-same_destroy(const struct xt_target *target, void *targinfo,
- unsigned int targinfosize)
+same_destroy(const struct xt_target *target, void *targinfo)
{
struct ipt_same_info *mr = targinfo;
@@ -133,8 +131,7 @@ same_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ef2fe5b3f0d8..4246c4321e5b 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -21,26 +21,14 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables TCP MSS modification module");
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
-}
-
static inline unsigned int
optlen(const u_int8_t *opt, unsigned int offset)
{
/* Beware zero-length options: make finite progress */
- if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
- else return opt[offset+1];
+ if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+ return 1;
+ else
+ return opt[offset+1];
}
static unsigned int
@@ -49,8 +37,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
struct tcphdr *tcph;
@@ -62,13 +49,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
if (!skb_make_writable(pskb, (*pskb)->len))
return NF_DROP;
- if ((*pskb)->ip_summed == CHECKSUM_HW &&
- skb_checksum_help(*pskb, out == NULL))
- return NF_DROP;
-
iph = (*pskb)->nh.iph;
tcplen = (*pskb)->len - iph->ihl*4;
-
tcph = (void *)iph + iph->ihl*4;
/* Since it passed flags test in tcp match, we know it is is
@@ -84,54 +66,41 @@ ipt_tcpmss_target(struct sk_buff **pskb,
return NF_DROP;
}
- if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
- if(!(*pskb)->dst) {
+ if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
+ if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
+ sizeof(struct tcphdr)) {
if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
+ printk(KERN_ERR "ipt_tcpmss_target: "
+ "unknown or invalid path-MTU (%d)\n",
+ dst_mtu((*pskb)->dst));
return NF_DROP; /* or IPT_CONTINUE ?? */
}
- if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
- if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
- return NF_DROP; /* or IPT_CONTINUE ?? */
- }
-
- newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+ newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
+ sizeof(struct tcphdr);
} else
newmss = tcpmssinfo->mss;
opt = (u_int8_t *)tcph;
- for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
- if ((opt[i] == TCPOPT_MSS) &&
- ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
- (opt[i+1] == TCPOLEN_MSS)) {
+ for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
+ if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
+ opt[i+1] == TCPOLEN_MSS) {
u_int16_t oldmss;
oldmss = (opt[i+2] << 8) | opt[i+3];
- if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
- (oldmss <= newmss))
- return IPT_CONTINUE;
+ if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+ oldmss <= newmss)
+ return IPT_CONTINUE;
opt[i+2] = (newmss & 0xff00) >> 8;
opt[i+3] = (newmss & 0x00ff);
- tcph->check = cheat_check(htons(oldmss)^0xFFFF,
- htons(newmss),
- tcph->check);
-
- DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
- "->%u.%u.%u.%u:%hu changed TCP MSS option"
- " (from %u to %u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- ntohs(tcph->source),
- NIPQUAD((*pskb)->nh.iph->daddr),
- ntohs(tcph->dest),
- oldmss, newmss);
- goto retmodified;
+ tcph->check = nf_proto_csum_update(*pskb,
+ htons(oldmss)^0xFFFF,
+ htons(newmss),
+ tcph->check, 0);
+ return IPT_CONTINUE;
}
}
@@ -143,13 +112,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
TCPOLEN_MSS, GFP_ATOMIC);
- if (!newskb) {
- if (net_ratelimit())
- printk(KERN_ERR "ipt_tcpmss_target:"
- " unable to allocate larger skb\n");
+ if (!newskb)
return NF_DROP;
- }
-
kfree_skb(*pskb);
*pskb = newskb;
iph = (*pskb)->nh.iph;
@@ -161,36 +125,29 @@ ipt_tcpmss_target(struct sk_buff **pskb,
opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
- tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
- htons(tcplen + TCPOLEN_MSS), tcph->check);
- tcplen += TCPOLEN_MSS;
-
+ tcph->check = nf_proto_csum_update(*pskb,
+ htons(tcplen) ^ 0xFFFF,
+ htons(tcplen + TCPOLEN_MSS),
+ tcph->check, 1);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = (newmss & 0x00ff);
- tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+ tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
+ tcph->check, 0);
oldval = ((u_int16_t *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
- tcph->check = cheat_check(oldval ^ 0xFFFF,
- ((u_int16_t *)tcph)[6], tcph->check);
+ tcph->check = nf_proto_csum_update(*pskb,
+ oldval ^ 0xFFFF,
+ ((u_int16_t *)tcph)[6],
+ tcph->check, 0);
newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
- iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
- newtotlen, iph->check);
+ iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
+ newtotlen, iph->check);
iph->tot_len = newtotlen;
-
- DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
- "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- ntohs(tcph->source),
- NIPQUAD((*pskb)->nh.iph->daddr),
- ntohs(tcph->dest),
- newmss);
-
- retmodified:
return IPT_CONTINUE;
}
@@ -200,9 +157,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
{
const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
- if (strcmp(m->u.kernel.match->name, "tcp") == 0
- && (tcpinfo->flg_cmp & TH_SYN)
- && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
+ if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+ tcpinfo->flg_cmp & TH_SYN &&
+ !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
return 1;
return 0;
@@ -214,17 +171,17 @@ ipt_tcpmss_checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
const struct ipt_entry *e = e_void;
- if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
- ((hook_mask & ~((1 << NF_IP_FORWARD)
- | (1 << NF_IP_LOCAL_OUT)
- | (1 << NF_IP_POST_ROUTING))) != 0)) {
- printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+ if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+ (hook_mask & ~((1 << NF_IP_FORWARD) |
+ (1 << NF_IP_LOCAL_OUT) |
+ (1 << NF_IP_POST_ROUTING))) != 0) {
+ printk("TCPMSS: path-MTU clamping only supported in "
+ "FORWARD, OUTPUT and POSTROUTING hooks\n");
return 0;
}
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 1c7a5ca399b3..471a4c438b0a 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -26,27 +26,20 @@ target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo,
- void *userinfo)
+ const void *targinfo)
{
const struct ipt_tos_target_info *tosinfo = targinfo;
+ struct iphdr *iph = (*pskb)->nh.iph;
+ u_int16_t oldtos;
- if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
- u_int16_t diffs[2];
-
+ if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos
- = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
- | tosinfo->tos;
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^0xFFFF));
+ iph = (*pskb)->nh.iph;
+ oldtos = iph->tos;
+ iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
+ iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+ iph->check);
}
return IPT_CONTINUE;
}
@@ -56,7 +49,6 @@ checkentry(const char *tablename,
const void *e_void,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f48892ae0be5..96e79cc6d0f2 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -23,11 +23,10 @@ static unsigned int
ipt_ttl_target(struct sk_buff **pskb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
struct iphdr *iph;
const struct ipt_TTL_info *info = targinfo;
- u_int16_t diffs[2];
int new_ttl;
if (!skb_make_writable(pskb, (*pskb)->len))
@@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb,
}
if (new_ttl != iph->ttl) {
- diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+ iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF,
+ ntohs(new_ttl << 8),
+ iph->check);
iph->ttl = new_ttl;
- diffs[1] = htons(((unsigned)iph->ttl) << 8);
- iph->check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- iph->check^0xFFFF));
}
return IPT_CONTINUE;
@@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hook_mask)
{
struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d46fd677fa11..2b104ea54f48 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
- const void *targinfo, void *userinfo)
+ const void *targinfo)
{
struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
@@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename,
const void *e,
const struct xt_target *target,
void *targinfo,
- unsigned int targinfosize,
unsigned int hookmask)
{
struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2927135873d7..1798f86bc534 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -74,7 +74,6 @@ checkentry(const char *tablename,
const void *ip_void,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ipt_ah *ahinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 47177591aeb6..000000000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* IP tables module for matching the value of the IPv4 DSCP field
- *
- * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_dscp.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- const struct xt_match *match, const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_dscp_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
-
- u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
- return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
-}
-
-static struct ipt_match dscp_match = {
- .name = "dscp",
- .match = match,
- .matchsize = sizeof(struct ipt_dscp_info),
- .me = THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
- return ipt_register_match(&dscp_match);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
- ipt_unregister_match(&dscp_match);
-
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b28250414933..dafbdec0efc0 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb,
static int checkentry(const char *tablename, const void *ip_void,
const struct xt_match *match,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
+ void *matchinfo, unsigned int hook_mask)
{
const struct ipt_ecn_info *info = matchinfo;
const struct ipt_ip *ip = ip_void;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 3bd2368e1fc9..4f73a61aa3dd 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename,
const void *inf,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
struct ipt_hashlimit_info *r = matchinfo;
@@ -529,18 +528,46 @@ hashlimit_checkentry(const char *tablename,
}
static void
-hashlimit_destroy(const struct xt_match *match, void *matchinfo,
- unsigned int matchsize)
+hashlimit_destroy(const struct xt_match *match, void *matchinfo)
{
struct ipt_hashlimit_info *r = matchinfo;
htable_put(r->hinfo);
}
+#ifdef CONFIG_COMPAT
+struct compat_ipt_hashlimit_info {
+ char name[IFNAMSIZ];
+ struct hashlimit_cfg cfg;
+ compat_uptr_t hinfo;
+ compat_uptr_t master;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+ int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+ memcpy(dst, src, off);
+ memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+ return copy_to_user(dst, src, off) ? -EFAULT : 0;
+}
+#endif
+
static struct ipt_match ipt_hashlimit = {
.name = "hashlimit",
.match = hashlimit_match,
.matchsize = sizeof(struct ipt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_hashlimit_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
.checkentry = hashlimit_checkentry,
.destroy = hashlimit_destroy,
.me = THIS_MODULE
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5ac6ac023b5e..78c336f12a9e 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -56,7 +56,6 @@ checkentry(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
- unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_owner_info *info = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 61a2139f9cfd..32ae8d7ac506 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -35,14 +35,20 @@ static unsigned int ip_list_tot = 100;
static unsigned int ip_pkt_list_tot = 20;
static unsigned int ip_list_hash_size = 0;
static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
module_param(ip_list_tot, uint, 0400);
module_param(ip_pkt_list_tot, uint, 0400);
module_param(ip_list_hash_size, uint, 0400);
module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
struct recent_entry {
@@ -232,7 +238,7 @@ out:
static int
ipt_recent_checkentry(const char *tablename, const void *ip,
const struct xt_match *match, void *matchinfo,
- unsigned int matchsize, unsigned int hook_mask)
+ unsigned int hook_mask)
{
const struct ipt_recent_info *info = matchinfo;
struct recent_table *t;
@@ -274,6 +280,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
goto out;
}
t->proc->proc_fops = &recent_fops;
+ t->proc->uid = ip_list_uid;
+ t->proc->gid = ip_list_gid;
t->proc->data = t;
#endif
spin_lock_bh(&recent_lock);
@@ -286,8 +294,7 @@ out:
}
static void
-ipt_recent_destroy(const struct xt_match *match, void *matchinfo,
- unsigned int matchsize)
+ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
{
const struct ipt_recent_info *info = matchinfo;
struct recent_table *t;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7f417484bfbf..e2e7dd8d7903 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -90,7 +90,7 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_filter);
}
static unsigned int
@@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook,
return NF_ACCEPT;
}
- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e7998beda63..79336cb42527 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_mangler);
}
static unsigned int
@@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook,
daddr = (*pskb)->nh.iph->daddr;
tos = (*pskb)->nh.iph->tos;
- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
&& ((*pskb)->nh.iph->saddr != saddr
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7912cce1e1b8..bcbeb4aeacd9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -95,7 +95,7 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
+ return ipt_do_table(pskb, hook, in, out, &packet_raw);
}
/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 663a73ee3f2f..790f00d500c3 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -25,7 +25,7 @@
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
-unsigned long nf_ct_icmp_timeout = 30*HZ;
+unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d61e2a9d394d..9c6cbe3d9fb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = {
SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 62b2762a2420..0e935b4c8741 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,8 +38,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
-#include <linux/config.h>
+
#include <linux/types.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
@@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (!inet->hdrincl)
raw_probe_proto_opt(&fl, msg);
+ security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
}
if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b873cbcdd0b8..20ffe8e88c0f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2639,51 +2639,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
{
struct rtable *rt = (struct rtable*)skb->dst;
struct rtmsg *r;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ struct nlmsghdr *nlh;
struct rta_cacheinfo ci;
-#ifdef CONFIG_IP_MROUTE
- struct rtattr *eptr;
-#endif
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
- r = NLMSG_DATA(nlh);
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ r = nlmsg_data(nlh);
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
r->rtm_tos = rt->fl.fl4_tos;
r->rtm_table = RT_TABLE_MAIN;
+ NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
r->rtm_type = rt->rt_type;
r->rtm_scope = RT_SCOPE_UNIVERSE;
r->rtm_protocol = RTPROT_UNSPEC;
r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
if (rt->rt_flags & RTCF_NOTIFY)
r->rtm_flags |= RTM_F_NOTIFY;
- RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+
+ NLA_PUT_U32(skb, RTA_DST, rt->rt_dst);
+
if (rt->fl.fl4_src) {
r->rtm_src_len = 32;
- RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+ NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src);
}
if (rt->u.dst.dev)
- RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+ NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
#ifdef CONFIG_NET_CLS_ROUTE
if (rt->u.dst.tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+ NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
- __u32 alg = rt->rt_multipath_alg;
-
- RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
- }
+ if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+ NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
#endif
if (rt->fl.iif)
- RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+ NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst);
else if (rt->rt_src != rt->fl.fl4_src)
- RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+ NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src);
+
if (rt->rt_dst != rt->rt_gateway)
- RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+ NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway);
+
if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
- goto rtattr_failure;
+ goto nla_put_failure;
+
ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
ci.rta_used = rt->u.dst.__use;
ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
@@ -2700,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
}
}
-#ifdef CONFIG_IP_MROUTE
- eptr = (struct rtattr*)skb->tail;
-#endif
- RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
if (rt->fl.iif) {
#ifdef CONFIG_IP_MROUTE
u32 dst = rt->rt_dst;
@@ -2715,41 +2715,46 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
if (!nowait) {
if (err == 0)
return 0;
- goto nlmsg_failure;
+ goto nla_put_failure;
} else {
if (err == -EMSGSIZE)
- goto nlmsg_failure;
- ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+ goto nla_put_failure;
+ ci.rta_error = err;
}
}
} else
#endif
- RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+ NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
}
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
+ return nlmsg_end(skb, nlh);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct rtattr **rta = arg;
- struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct rtmsg *rtm;
+ struct nlattr *tb[RTA_MAX+1];
struct rtable *rt = NULL;
- u32 dst = 0;
- u32 src = 0;
- int iif = 0;
- int err = -ENOBUFS;
+ u32 dst, src, iif;
+ int err;
struct sk_buff *skb;
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ rtm = nlmsg_data(nlh);
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- goto out;
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
/* Reserve room for dummy headers, this skb can pass
through good chunk of routing engine.
@@ -2760,62 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
skb->nh.iph->protocol = IPPROTO_ICMP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
- if (rta[RTA_SRC - 1])
- memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
- if (rta[RTA_DST - 1])
- memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
- if (rta[RTA_IIF - 1])
- memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
+ src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0;
+ dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0;
+ iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
if (iif) {
- struct net_device *dev = __dev_get_by_index(iif);
- err = -ENODEV;
- if (!dev)
- goto out_free;
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(iif);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout_free;
+ }
+
skb->protocol = htons(ETH_P_IP);
skb->dev = dev;
local_bh_disable();
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
local_bh_enable();
- rt = (struct rtable*)skb->dst;
- if (!err && rt->u.dst.error)
+
+ rt = (struct rtable*) skb->dst;
+ if (err == 0 && rt->u.dst.error)
err = -rt->u.dst.error;
} else {
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
- .saddr = src,
- .tos = rtm->rtm_tos } } };
- int oif = 0;
- if (rta[RTA_OIF - 1])
- memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
- fl.oif = oif;
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = dst,
+ .saddr = src,
+ .tos = rtm->rtm_tos,
+ },
+ },
+ .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+ };
err = ip_route_output_key(&rt, &fl);
}
+
if (err)
- goto out_free;
+ goto errout_free;
skb->dst = &rt->u.dst;
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
- if (!err)
- goto out_free;
- if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
- }
+ if (err <= 0)
+ goto errout_free;
- err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
- if (err > 0)
- err = 0;
-out: return err;
+ err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout:
+ return err;
-out_free:
+errout_free:
kfree_skb(skb);
- goto out;
+ goto errout;
}
int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3143,13 +3147,9 @@ int __init ip_rt_init(void)
}
#endif
- ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
- sizeof(struct rtable),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
-
- if (!ipv4_dst_ops.kmem_cachep)
- panic("IP: failed to allocate ip_dst_cache\n");
+ ipv4_dst_ops.kmem_cachep =
+ kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
rt_hash_table = (struct rt_hash_bucket *)
alloc_large_system_hash("IP route cache",
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be3331f67..661e0a4bca72 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
if (!req)
goto out;
+ if (security_inet_conn_request(sk, skb, req)) {
+ reqsk_free(req);
+ goto out;
+ }
ireq = inet_rsk(req);
treq = tcp_rsk(req);
treq->rcv_isn = htonl(skb->h.th->seq) - 1;
@@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
.uli_u = { .ports =
{ .sport = skb->h.th->dest,
.dport = skb->h.th->source } } };
+ security_req_classify_flow(req, &fl);
if (ip_route_output_key(&rt, &fl)) {
reqsk_free(req);
goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70cea9d08a38..19b2071ff319 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
+#include <net/cipso_ipv4.h>
/* From af_inet.c */
extern int sysctl_ip_nonlocal_bind;
@@ -697,6 +698,40 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+#ifdef CONFIG_NETLABEL
+ {
+ .ctl_name = NET_CIPSOV4_CACHE_ENABLE,
+ .procname = "cipso_cache_enable",
+ .data = &cipso_v4_cache_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE,
+ .procname = "cipso_cache_bucket_size",
+ .data = &cipso_v4_cache_bucketsize,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_CIPSOV4_RBM_OPTFMT,
+ .procname = "cipso_rbm_optfmt",
+ .data = &cipso_v4_rbm_optfmt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_CIPSOV4_RBM_STRICTVALID,
+ .procname = "cipso_rbm_strictvalid",
+ .data = &cipso_v4_rbm_strictvalid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif /* CONFIG_NETLABEL */
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 934396bb1376..66e9a729f6df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,7 +268,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
-int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
@@ -568,7 +568,7 @@ new_segment:
skb->truesize += copy;
sk->sk_wmem_queued += copy;
sk->sk_forward_alloc -= copy;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
skb_shinfo(skb)->gso_segs = 0;
@@ -723,7 +723,7 @@ new_segment:
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, tp, skb);
copy = size_goal;
@@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
* receive buffer and there was a small segment
* in queue.
*/
- (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
- !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+ (copied > 0 &&
+ ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
+ ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
+ !icsk->icsk_ack.pingpong)) &&
+ !atomic_read(&sk->sk_rmem_alloc)))
time_to_ack = 1;
}
@@ -2205,7 +2208,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
th->fin = th->psh = 0;
th->check = ~csum_fold(th->check + delta);
- if (skb->ip_summed != CHECKSUM_HW)
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check = csum_fold(csum_partial(skb->h.raw, thlen,
skb->csum));
@@ -2219,7 +2222,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
th->check = ~csum_fold(th->check + delta);
- if (skb->ip_summed != CHECKSUM_HW)
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check = csum_fold(csum_partial(skb->h.raw, thlen,
skb->csum));
@@ -2254,9 +2257,7 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
- if (!tcp_hashinfo.bind_bucket_cachep)
- panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b0134ab08379..5730333cd0ac 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = {
static int __init bictcp_register(void)
{
- BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&bictcp);
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 2be27980ca78..a60ef38d75c6 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = {
static int __init cubictcp_register(void)
{
- BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
/* Precompute a bunch of the scaling factors that are used per-packet
* based on SRTT of 100ms
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index fa3e1aad660c..c4fc811bf377 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
static int __init hstcp_register(void)
{
- BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_highspeed);
}
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 6edfe5e4510e..682e7d5b6f2f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = {
static int __init htcp_register(void)
{
- BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
return tcp_register_congestion_control(&htcp);
}
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 7406e0c5fb8e..59e691d26f64 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = {
static int __init hybla_register(void)
{
- BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_hybla);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 159fa3f1ba67..b3def0df14fb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,24 +72,24 @@
#include <asm/unaligned.h>
#include <net/netdma.h>
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-int sysctl_tcp_fack = 1;
-int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn;
-int sysctl_tcp_dsack = 1;
-int sysctl_tcp_app_win = 31;
-int sysctl_tcp_adv_win_scale = 2;
-
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_frto;
-int sysctl_tcp_nometrics_save;
-
-int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc;
+int sysctl_tcp_timestamps __read_mostly = 1;
+int sysctl_tcp_window_scaling __read_mostly = 1;
+int sysctl_tcp_sack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_dsack __read_mostly = 1;
+int sysctl_tcp_app_win __read_mostly = 31;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
+
+int sysctl_tcp_stdurg __read_mostly;
+int sysctl_tcp_rfc1337 __read_mostly;
+int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
+
+int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
+int sysctl_tcp_abc __read_mostly;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
/* skb->len may jitter because of SACKs, even if peer
* sends good full-sized frames.
*/
- len = skb->len;
+ len = skb_shinfo(skb)->gso_size ?: skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
icsk->icsk_ack.rcv_mss = len;
} else {
@@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
return;
}
}
+ if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b04c3edd4a9..39b179856082 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -78,8 +78,8 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-int sysctl_tcp_tw_reuse;
-int sysctl_tcp_low_latency;
+int sysctl_tcp_tw_reuse __read_mostly;
+int sysctl_tcp_low_latency __read_mostly;
/* Check TCP sequence numbers in ICMP packets. */
#define ICMP_MIN_LENGTH 8
@@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
struct inet_sock *inet = inet_sk(sk);
struct tcphdr *th = skb->h.th;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
skb->csum = offsetof(struct tcphdr, check);
} else {
@@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
th->check = 0;
th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
skb->csum = offsetof(struct tcphdr, check);
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
}
@@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
ireq = inet_rsk(req);
ireq->loc_addr = daddr;
ireq->rmt_addr = saddr;
@@ -948,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
- th->source, skb->nh.iph->daddr,
- ntohs(th->dest), inet_iif(skb));
+ nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
+ th->source, skb->nh.iph->daddr,
+ th->dest, inet_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -970,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
static int tcp_v4_checksum_init(struct sk_buff *skb)
{
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
skb->nh.iph->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1087,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
- skb->nh.iph->daddr, ntohs(th->dest),
+ skb->nh.iph->daddr, th->dest,
inet_iif(skb));
if (!sk)
@@ -1101,7 +1104,7 @@ process:
goto discard_and_relse;
nf_reset(skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
@@ -1165,7 +1168,7 @@ do_time_wait:
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
skb->nh.iph->daddr,
- ntohs(th->dest),
+ th->dest,
inet_iif(skb));
if (sk2) {
inet_twsk_deschedule((struct inet_timewait_sock *)sk,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 48f28d617ce6..308fb7e071c5 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -35,7 +35,6 @@
* Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <net/tcp.h>
@@ -328,7 +327,7 @@ static struct tcp_congestion_ops tcp_lp = {
static int __init tcp_lp_register(void)
{
- BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_lp);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624e2b2c7f53..0163d9826907 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,8 +34,8 @@
#define SYNC_INIT 1
#endif
-int sysctl_tcp_syncookies = SYNC_INIT;
-int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+int sysctl_tcp_abort_on_overflow __read_mostly;
struct inet_timewait_death_row tcp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4f3ffe1b3b4..061edfae0c29 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
#include <linux/smp_lock.h>
/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse = 1;
+int sysctl_tcp_retrans_collapse __read_mostly = 1;
/* People can turn this on to work with those rare, broken TCPs that
* interpret the window field as a signed quantity.
*/
-int sysctl_tcp_workaround_signed_windows = 0;
+int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
*/
-int sysctl_tcp_tso_win_divisor = 3;
+int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-int sysctl_tcp_mtu_probing = 0;
-int sysctl_tcp_base_mss = 512;
+int sysctl_tcp_mtu_probing __read_mostly = 0;
+int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
-int sysctl_tcp_slow_start_after_idle = 1;
+int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
static void update_send_head(struct sock *sk, struct tcp_sock *tp,
struct sk_buff *skb)
@@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
- if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+ if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
/* Copy and checksum data tail into the new buffer. */
buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
nsize, 0);
@@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
skb->csum = csum_block_sub(skb->csum, buff->csum, len);
} else {
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
}
@@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
__pskb_trim_head(skb, len - skb_headlen(skb));
TCP_SKB_CB(skb)->seq += len;
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb->truesize -= len;
sk->sk_wmem_queued -= len;
@@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* This packet was never sent out yet, so no SACK bits. */
TCP_SKB_CB(buff)->sacked = 0;
- buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+ buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
/* Fix up tso_factor for both original and new SKB. */
@@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
- if (skb->ip_summed == CHECKSUM_HW)
- nskb->ip_summed = CHECKSUM_HW;
+ nskb->ip_summed = skb->ip_summed;
len = 0;
while (len < probe_size) {
@@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk)
~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
- if (skb->ip_summed != CHECKSUM_HW)
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->csum = csum_partial(skb->data, skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
@@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
- if (next_skb->ip_summed == CHECKSUM_HW)
- skb->ip_summed = CHECKSUM_HW;
+ skb->ip_summed = next_skb->ip_summed;
- if (skb->ip_summed != CHECKSUM_HW)
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
/* Update sequence range on original skb. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7c1bde3cd6cb..fb09ade5897b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
#include <linux/module.h>
#include <net/tcp.h>
-int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
-int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
-int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
-int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
-int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
-int sysctl_tcp_retries1 = TCP_RETR1;
-int sysctl_tcp_retries2 = TCP_RETR2;
-int sysctl_tcp_orphan_retries;
+int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
+int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
+int sysctl_tcp_orphan_retries __read_mostly;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 490360b5b4bf..a3b7aa015a2f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = {
static int __init tcp_vegas_register(void)
{
- BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_vegas);
return 0;
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 11b42a7135c1..ce57bf302f6c 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
* See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = {
static int __init tcp_veno_register(void)
{
- BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_veno);
return 0;
}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 5446312ffd2a..4f42a86c77f3 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = {
static int __init tcp_westwood_register(void)
{
- BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_westwood);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f136cec96d95..77e265d7bb8f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -118,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
struct hlist_head udp_hash[UDP_HTABLE_SIZE];
DEFINE_RWLOCK(udp_hash_lock);
-/* Shared by v4/v6 udp. */
-int udp_port_rover;
+static int udp_port_rover;
-static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_lport_inuse(u16 num)
+{
+ struct sock *sk;
+ struct hlist_node *node;
+
+ sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+ if (inet_sk(sk)->num == num)
+ return 1;
+ return 0;
+}
+
+/**
+ * udp_get_port - common port lookup for IPv4 and IPv6
+ *
+ * @sk: socket struct in question
+ * @snum: port number to look up
+ * @saddr_comp: AF-dependent comparison of bound local IP addresses
+ */
+int udp_get_port(struct sock *sk, unsigned short snum,
+ int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
{
struct hlist_node *node;
+ struct hlist_head *head;
struct sock *sk2;
- struct inet_sock *inet = inet_sk(sk);
+ int error = 1;
write_lock_bh(&udp_hash_lock);
if (snum == 0) {
@@ -137,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
best_size_so_far = 32767;
best = result = udp_port_rover;
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
- struct hlist_head *list;
int size;
- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
- if (hlist_empty(list)) {
+ head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+ if (hlist_empty(head)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
((result - sysctl_local_port_range[0]) &
@@ -149,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
goto gotit;
}
size = 0;
- sk_for_each(sk2, node, list)
- if (++size >= best_size_so_far)
- goto next;
- best_size_so_far = size;
- best = result;
- next:;
+ sk_for_each(sk2, node, head)
+ if (++size < best_size_so_far) {
+ best_size_so_far = size;
+ best = result;
+ }
}
result = best;
for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
@@ -170,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
gotit:
udp_port_rover = snum = result;
} else {
- sk_for_each(sk2, node,
- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
- struct inet_sock *inet2 = inet_sk(sk2);
-
- if (inet2->num == snum &&
- sk2 != sk &&
- !ipv6_only_sock(sk2) &&
- (!sk2->sk_bound_dev_if ||
- !sk->sk_bound_dev_if ||
- sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!inet2->rcv_saddr ||
- !inet->rcv_saddr ||
- inet2->rcv_saddr == inet->rcv_saddr) &&
- (!sk2->sk_reuse || !sk->sk_reuse))
+ head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+
+ sk_for_each(sk2, node, head)
+ if (inet_sk(sk2)->num == snum &&
+ sk2 != sk &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ (*saddr_cmp)(sk, sk2) )
goto fail;
- }
}
- inet->num = snum;
+ inet_sk(sk)->num = snum;
if (sk_unhashed(sk)) {
- struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
-
- sk_add_node(sk, h);
+ head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ sk_add_node(sk, head);
sock_prot_inc_use(sk->sk_prot);
}
- write_unlock_bh(&udp_hash_lock);
- return 0;
-
+ error = 0;
fail:
write_unlock_bh(&udp_hash_lock);
- return 1;
+ return error;
+}
+
+static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+{
+ struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+
+ return ( !ipv6_only_sock(sk2) &&
+ (!inet1->rcv_saddr || !inet2->rcv_saddr ||
+ inet1->rcv_saddr == inet2->rcv_saddr ));
}
+static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+ return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
+}
+
+
static void udp_v4_hash(struct sock *sk)
{
BUG();
@@ -429,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
/*
* Only one fragment on the socket.
*/
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
skb->csum = offsetof(struct udphdr, check);
uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
up->len, IPPROTO_UDP, 0);
@@ -448,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
* fragments on the socket so that all csums of sk_buffs
* should be together.
*/
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
int offset = (unsigned char *)uh - skb->data;
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -603,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
+ security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
if (err)
goto out;
@@ -661,6 +685,16 @@ out:
UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
return len;
}
+ /*
+ * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
+ * ENOBUFS might not be good (it's not tunable per se), but otherwise
+ * we don't have a good statistic (IpOutDiscards but it can be too many
+ * things). We could add another new stat but at least for now that
+ * seems like overkill.
+ */
+ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+ }
return err;
do_confirm:
@@ -980,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
+ int rc;
/*
* Charge it to the socket, dropping if the queue is full.
@@ -1026,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (sock_queue_rcv_skb(sk,skb)<0) {
+ if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+ /* Note that an ENOMEM error is charged twice */
+ if (rc == -ENOMEM)
+ UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
return -1;
@@ -1087,7 +1125,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
{
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else if (skb->ip_summed == CHECKSUM_HW) {
+ } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -1581,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect);
EXPORT_SYMBOL(udp_hash);
EXPORT_SYMBOL(udp_hash_lock);
EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_port_rover);
+EXPORT_SYMBOL(udp_get_port);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84511a6..040e8475f295 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
if (x->mode->input(x, skb))
goto drop;
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
decaps = 1;
break;
}
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a9e6b3dd19c9..92676b7e4034 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -21,9 +21,8 @@
* On exit, skb->h will be set to the start of the payload to be processed
* by x->type->output and skb->nh will be set to the top IP header.
*/
-static int xfrm4_transport_output(struct sk_buff *skb)
+static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_state *x;
struct iphdr *iph;
int ihl;
@@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb)
ihl = iph->ihl * 4;
skb->h.raw += ihl;
- x = skb->dst->xfrm;
skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
return 0;
}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 13cafbe56ce3..e23c21d31a53 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
* On exit, skb->h will be set to the start of the payload to be processed
* by x->type->output and skb->nh will be set to the top IP header.
*/
-static int xfrm4_tunnel_output(struct sk_buff *skb)
+static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct xfrm_state *x = dst->xfrm;
struct iphdr *iph, *top_iph;
int flags;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d16f863cf687..04403fb01a58 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int err;
- if (skb->ip_summed == CHECKSUM_HW) {
- err = skb_checksum_help(skb, 0);
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ err = skb_checksum_help(skb);
if (err)
goto error_nolock;
}
- if (x->props.mode) {
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
err = xfrm4_tunnel_check_size(skb);
if (err)
goto error_nolock;
@@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
if (err)
goto error;
- err = x->mode->output(skb);
+ err = x->mode->output(x, skb);
if (err)
goto error;
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
}
dst = skb->dst;
x = dst->xfrm;
- } while (x && !x->props.mode);
+ } while (x && (x->props.mode != XFRM_MODE_TUNNEL));
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8f50eae47d03..eabcd27b1767 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
return __ip_route_output_key((struct rtable**)dst, fl);
}
+static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+ struct rtable *rt;
+ struct flowi fl_tunnel = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = daddr->a4,
+ },
+ },
+ };
+
+ if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+ saddr->a4 = rt->rt_src;
+ dst_release(&rt->u.dst);
+ return 0;
+ }
+ return -EHOSTUNREACH;
+}
+
static struct dst_entry *
__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
@@ -33,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
xdst->u.rt.fl.fl4_src == fl->fl4_src &&
xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
- xfrm_bundle_ok(xdst, fl, AF_INET)) {
+ xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
dst_clone(dst);
break;
}
@@ -93,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
xdst = (struct xfrm_dst *)dst1;
xdst->route = &rt->u.dst;
+ xdst->genid = xfrm[i]->genid;
dst1->next = dst_prev;
dst_prev = dst1;
- if (xfrm[i]->props.mode) {
+ if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
remote = xfrm[i]->id.daddr.a4;
local = xfrm[i]->props.saddr.a4;
tunnel = 1;
@@ -135,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
dst_prev->flags |= DST_HOST;
dst_prev->lastuse = jiffies;
dst_prev->header_len = header_len;
+ dst_prev->nfheader_len = 0;
dst_prev->trailer_len = trailer_len;
memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
@@ -296,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
.dst_ops = &xfrm4_dst_ops,
.dst_lookup = xfrm4_dst_lookup,
+ .get_saddr = xfrm4_get_saddr,
.find_bundle = __xfrm4_find_bundle,
.bundle_create = __xfrm4_bundle_create,
.decode_session = _decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751c966e..fe2034494d08 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->props.saddr = tmpl->saddr;
if (x->props.saddr.a4 == 0)
x->props.saddr.a4 = saddr->a4;
- if (tmpl->mode && x->props.saddr.a4 == 0) {
- struct rtable *rt;
- struct flowi fl_tunnel = {
- .nl_u = {
- .ip4_u = {
- .daddr = x->id.daddr.a4,
- }
- }
- };
- if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
- &fl_tunnel, AF_INET)) {
- x->props.saddr.a4 = rt->rt_src;
- dst_release(&rt->u.dst);
- }
- }
x->props.mode = tmpl->mode;
x->props.reqid = tmpl->reqid;
x->props.family = AF_INET;
}
-static struct xfrm_state *
-__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
-{
- unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
- struct xfrm_state *x;
-
- list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
- if (x->props.family == AF_INET &&
- spi == x->id.spi &&
- daddr->a4 == x->id.daddr.a4 &&
- proto == x->id.proto) {
- xfrm_state_hold(x);
- return x;
- }
- }
- return NULL;
-}
-
-static struct xfrm_state *
-__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
- xfrm_address_t *daddr, xfrm_address_t *saddr,
- int create)
-{
- struct xfrm_state *x, *x0;
- unsigned h = __xfrm4_dst_hash(daddr);
-
- x0 = NULL;
-
- list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
- if (x->props.family == AF_INET &&
- daddr->a4 == x->id.daddr.a4 &&
- mode == x->props.mode &&
- proto == x->id.proto &&
- saddr->a4 == x->props.saddr.a4 &&
- reqid == x->props.reqid &&
- x->km.state == XFRM_STATE_ACQ &&
- !x->id.spi) {
- x0 = x;
- break;
- }
- }
- if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
- x0->sel.daddr.a4 = daddr->a4;
- x0->sel.saddr.a4 = saddr->a4;
- x0->sel.prefixlen_d = 32;
- x0->sel.prefixlen_s = 32;
- x0->props.saddr.a4 = saddr->a4;
- x0->km.state = XFRM_STATE_ACQ;
- x0->id.daddr.a4 = daddr->a4;
- x0->id.proto = proto;
- x0->props.family = AF_INET;
- x0->props.mode = mode;
- x0->props.reqid = reqid;
- x0->props.family = AF_INET;
- x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
- xfrm_state_hold(x0);
- x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
- add_timer(&x0->timer);
- xfrm_state_hold(x0);
- list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
- wake_up(&km_waitq);
- }
- if (x0)
- xfrm_state_hold(x0);
- return x0;
-}
-
static struct xfrm_state_afinfo xfrm4_state_afinfo = {
.family = AF_INET,
.init_flags = xfrm4_init_flags,
.init_tempsel = __xfrm4_init_tempsel,
- .state_lookup = __xfrm4_state_lookup,
- .find_acq = __xfrm4_find_acq,
};
void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa127c83..f110af5b1319 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
static int ipip_init_state(struct xfrm_state *x)
{
- if (!x->props.mode)
+ if (x->props.mode != XFRM_MODE_TUNNEL)
return -EINVAL;
if (x->encap)