diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/nf_nat_core.c | 120 |
1 files changed, 118 insertions, 2 deletions
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6d8da6dddf99..d9ea2c26f309 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -183,7 +183,35 @@ hash_by_src(const struct net *net, return reciprocal_scale(hash, nf_nat_htable_size); } -/* Is this tuple already taken? (not by us) */ +/** + * nf_nat_used_tuple - check if proposed nat tuple clashes with existing entry + * @tuple: proposed NAT binding + * @ignored_conntrack: our (unconfirmed) conntrack entry + * + * A conntrack entry can be inserted to the connection tracking table + * if there is no existing entry with an identical tuple in either direction. + * + * Example: + * INITIATOR -> NAT/PAT -> RESPONDER + * + * INITIATOR passes through NAT/PAT ("us") and SNAT is done (saddr rewrite). + * Then, later, NAT/PAT itself also connects to RESPONDER. + * + * This will not work if the SNAT done earlier has same IP:PORT source pair. + * + * Conntrack table has: + * ORIGINAL: $IP_INITIATOR:$SPORT -> $IP_RESPONDER:$DPORT + * REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT + * + * and new locally originating connection wants: + * ORIGINAL: $IP_NAT:$SPORT -> $IP_RESPONDER:$DPORT + * REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT + * + * ... which would mean incoming packets cannot be distinguished between + * the existing and the newly added entry (identical IP_CT_DIR_REPLY tuple). + * + * @return: true if the proposed NAT mapping collides with an existing entry. + */ static int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) @@ -200,6 +228,94 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, return nf_conntrack_tuple_taken(&reply, ignored_conntrack); } +static bool nf_nat_allow_clash(const struct nf_conn *ct) +{ + return nf_ct_l4proto_find(nf_ct_protonum(ct))->allow_clash; +} + +/** + * nf_nat_used_tuple_new - check if to-be-inserted conntrack collides with existing entry + * @tuple: proposed NAT binding + * @ignored_ct: our (unconfirmed) conntrack entry + * + * Same as nf_nat_used_tuple, but also check for rare clash in reverse + * direction. Should be called only when @tuple has not been altered, i.e. + * @ignored_conntrack will not be subject to NAT. + * + * @return: true if the proposed NAT mapping collides with existing entry. + */ +static noinline bool +nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_ct) +{ + static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT; + const struct nf_conntrack_tuple_hash *thash; + const struct nf_conntrack_zone *zone; + struct nf_conn *ct; + bool taken = true; + struct net *net; + + if (!nf_nat_used_tuple(tuple, ignored_ct)) + return false; + + if (!nf_nat_allow_clash(ignored_ct)) + return true; + + /* Initial choice clashes with existing conntrack. + * Check for (rare) reverse collision. + * + * This can happen when new packets are received in both directions + * at the exact same time on different CPUs. + * + * Without SMP, first packet creates new conntrack entry and second + * packet is resolved as established reply packet. + * + * With parallel processing, both packets could be picked up as + * new and both get their own ct entry allocated. + * + * If ignored_conntrack and colliding ct are not subject to NAT then + * pretend the tuple is available and let later clash resolution + * handle this at insertion time. + * + * Without it, the 'reply' packet has its source port rewritten + * by nat engine. + */ + if (READ_ONCE(ignored_ct->status) & uses_nat) + return true; + + net = nf_ct_net(ignored_ct); + zone = nf_ct_zone(ignored_ct); + + thash = nf_conntrack_find_get(net, zone, tuple); + if (unlikely(!thash)) /* clashing entry went away */ + return false; + + ct = nf_ct_tuplehash_to_ctrack(thash); + + /* NB: IP_CT_DIR_ORIGINAL should be impossible because + * nf_nat_used_tuple() handles origin collisions. + * + * Handle remote chance other CPU confirmed its ct right after. + */ + if (thash->tuple.dst.dir != IP_CT_DIR_REPLY) + goto out; + + /* clashing connection subject to NAT? Retry with new tuple. */ + if (READ_ONCE(ct->status) & uses_nat) + goto out; + + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) && + nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) { + taken = false; + goto out; + } +out: + nf_ct_put(ct); + return taken; +} + static bool nf_nat_may_kill(struct nf_conn *ct, unsigned long flags) { static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT | @@ -611,7 +727,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (nf_in_range(orig_tuple, range)) { - if (!nf_nat_used_tuple(orig_tuple, ct)) { + if (!nf_nat_used_tuple_new(orig_tuple, ct)) { *tuple = *orig_tuple; return; } |