diff options
-rw-r--r-- | include/net/netfilter/nf_conntrack.h | 2 | ||||
-rw-r--r-- | include/net/netfilter/nf_nat.h | 15 | ||||
-rw-r--r-- | include/net/netfilter/nf_queue.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/iptable_nat.c | 4 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_nat.c | 4 | ||||
-rw-r--r-- | net/netfilter/core.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipset/ip_set_core.c | 243 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 25 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 118 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_tcp.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_queue.c | 152 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue_core.c | 14 |
13 files changed, 332 insertions, 259 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f1494feba79f..caca0c4d6b4b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -182,7 +182,7 @@ __nf_conntrack_find(struct net *net, u16 zone, extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); -extern void nf_ct_insert_dying_list(struct nf_conn *ct); +extern void nf_ct_dying_timeout(struct nf_conn *ct); extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index bd8eea720f2e..ad14a799fd2e 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -68,4 +68,19 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) #endif } +static inline bool nf_nat_oif_changed(unsigned int hooknum, + enum ip_conntrack_info ctinfo, + struct nf_conn_nat *nat, + const struct net_device *out) +{ +#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE) + return nat->masq_index && hooknum == NF_INET_POST_ROUTING && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && + nat->masq_index != out->ifindex; +#else + return false; +#endif +} + #endif diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 252fd1010b77..fb1c0be38b6d 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -21,14 +21,10 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - char *name; }; -extern int nf_register_queue_handler(u_int8_t pf, - const struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(u_int8_t pf, - const struct nf_queue_handler *qh); -extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh); +void nf_register_queue_handler(const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(void); extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 43bfe3e1685b..86e930cf3dfb 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -9,6 +9,8 @@ enum cntl_msg_types { IPCTNL_MSG_CT_GET_CTRZERO, IPCTNL_MSG_CT_GET_STATS_CPU, IPCTNL_MSG_CT_GET_STATS, + IPCTNL_MSG_CT_GET_DYING, + IPCTNL_MSG_CT_GET_UNCONFIRMED, IPCTNL_MSG_MAX }; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ac635a7b4416..da2c8a368f68 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -134,6 +134,10 @@ nf_nat_ipv4_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; + } } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index fa84cf8ec6bc..6c8ae24b85eb 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -137,6 +137,10 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; + } } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 68912dadf13d..a9c488b6c50d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -295,8 +295,6 @@ void __init netfilter_init(void) panic("cannot create netfilter proc entry"); #endif - if (netfilter_queue_init() < 0) - panic("cannot initialize nf_queue"); if (netfilter_log_init() < 0) panic("cannot initialize nf_log"); } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index fed899f600b2..6d6d8f2b033e 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -28,9 +28,10 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ -static struct ip_set **ip_set_list; /* all individual sets */ +static struct ip_set * __rcu *ip_set_list; /* all individual sets */ static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ +#define IP_SET_INC 64 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) static unsigned int max_sets; @@ -42,6 +43,12 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); +/* When the nfnl mutex is held: */ +#define nfnl_dereference(p) \ + rcu_dereference_protected(p, 1) +#define nfnl_set(id) \ + nfnl_dereference(ip_set_list)[id] + /* * The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -321,19 +328,19 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); */ static inline void -__ip_set_get(ip_set_id_t index) +__ip_set_get(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); - ip_set_list[index]->ref++; + set->ref++; write_unlock_bh(&ip_set_ref_lock); } static inline void -__ip_set_put(ip_set_id_t index) +__ip_set_put(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); - BUG_ON(ip_set_list[index]->ref == 0); - ip_set_list[index]->ref--; + BUG_ON(set->ref == 0); + set->ref--; write_unlock_bh(&ip_set_ref_lock); } @@ -344,12 +351,25 @@ __ip_set_put(ip_set_id_t index) * so it can't be destroyed (or changed) under our foot. */ +static inline struct ip_set * +ip_set_rcu_get(ip_set_id_t index) +{ + struct ip_set *set; + + rcu_read_lock(); + /* ip_set_list itself needs to be protected */ + set = rcu_dereference(ip_set_list)[index]; + rcu_read_unlock(); + + return set; +} + int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, const struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_list[index]; + struct ip_set *set = ip_set_rcu_get(index); int ret = 0; BUG_ON(set == NULL); @@ -388,7 +408,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, const struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_list[index]; + struct ip_set *set = ip_set_rcu_get(index); int ret; BUG_ON(set == NULL); @@ -411,7 +431,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, const struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_list[index]; + struct ip_set *set = ip_set_rcu_get(index); int ret = 0; BUG_ON(set == NULL); @@ -440,14 +460,17 @@ ip_set_get_byname(const char *name, struct ip_set **set) ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + rcu_read_lock(); for (i = 0; i < ip_set_max; i++) { - s = ip_set_list[i]; + s = rcu_dereference(ip_set_list)[i]; if (s != NULL && STREQ(s->name, name)) { - __ip_set_get(i); + __ip_set_get(s); index = i; *set = s; + break; } } + rcu_read_unlock(); return index; } @@ -462,8 +485,13 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); void ip_set_put_byindex(ip_set_id_t index) { - if (ip_set_list[index] != NULL) - __ip_set_put(index); + struct ip_set *set; + + rcu_read_lock(); + set = rcu_dereference(ip_set_list)[index]; + if (set != NULL) + __ip_set_put(set); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ip_set_put_byindex); @@ -477,7 +505,7 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); const char * ip_set_name_byindex(ip_set_id_t index) { - const struct ip_set *set = ip_set_list[index]; + const struct ip_set *set = ip_set_rcu_get(index); BUG_ON(set == NULL); BUG_ON(set->ref == 0); @@ -501,11 +529,18 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); ip_set_id_t ip_set_nfnl_get(const char *name) { + ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; - ip_set_id_t index; nfnl_lock(); - index = ip_set_get_byname(name, &s); + for (i = 0; i < ip_set_max; i++) { + s = nfnl_set(i); + if (s != NULL && STREQ(s->name, name)) { + __ip_set_get(s); + index = i; + break; + } + } nfnl_unlock(); return index; @@ -521,12 +556,15 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get); ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index) { + struct ip_set *set; + if (index > ip_set_max) return IPSET_INVALID_ID; nfnl_lock(); - if (ip_set_list[index]) - __ip_set_get(index); + set = nfnl_set(index); + if (set) + __ip_set_get(set); else index = IPSET_INVALID_ID; nfnl_unlock(); @@ -545,8 +583,11 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); void ip_set_nfnl_put(ip_set_id_t index) { + struct ip_set *set; nfnl_lock(); - ip_set_put_byindex(index); + set = nfnl_set(index); + if (set != NULL) + __ip_set_put(set); nfnl_unlock(); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -603,41 +644,46 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, }; -static ip_set_id_t -find_set_id(const char *name) +static struct ip_set * +find_set_and_id(const char *name, ip_set_id_t *id) { - ip_set_id_t i, index = IPSET_INVALID_ID; - const struct ip_set *set; + struct ip_set *set = NULL; + ip_set_id_t i; - for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { - set = ip_set_list[i]; - if (set != NULL && STREQ(set->name, name)) - index = i; + *id = IPSET_INVALID_ID; + for (i = 0; i < ip_set_max; i++) { + set = nfnl_set(i); + if (set != NULL && STREQ(set->name, name)) { + *id = i; + break; + } } - return index; + return (*id == IPSET_INVALID_ID ? NULL : set); } static inline struct ip_set * find_set(const char *name) { - ip_set_id_t index = find_set_id(name); + ip_set_id_t id; - return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; + return find_set_and_id(name, &id); } static int find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) { + struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] == NULL) { + s = nfnl_set(i); + if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; - } else if (STREQ(name, ip_set_list[i]->name)) { + } else if (STREQ(name, s->name)) { /* Name clash */ - *set = ip_set_list[i]; + *set = s; return -EEXIST; } } @@ -730,10 +776,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * and check clashing. */ ret = find_free_id(set->name, &index, &clash); - if (ret != 0) { + if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ - if (ret == -EEXIST && - (flags & IPSET_FLAG_EXIST) && + if ((flags & IPSET_FLAG_EXIST) && STREQ(set->type->name, clash->type->name) && set->type->family == clash->type->family && set->type->revision_min == clash->type->revision_min && @@ -741,13 +786,36 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set->variant->same_set(set, clash)) ret = 0; goto cleanup; - } + } else if (ret == -IPSET_ERR_MAX_SETS) { + struct ip_set **list, **tmp; + ip_set_id_t i = ip_set_max + IP_SET_INC; + + if (i < ip_set_max || i == IPSET_INVALID_ID) + /* Wraparound */ + goto cleanup; + + list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL); + if (!list) + goto cleanup; + /* nfnl mutex is held, both lists are valid */ + tmp = nfnl_dereference(ip_set_list); + memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max); + rcu_assign_pointer(ip_set_list, list); + /* Make sure all current packets have passed through */ + synchronize_net(); + /* Use new list */ + index = ip_set_max; + ip_set_max = i; + kfree(tmp); + ret = 0; + } else if (ret) + goto cleanup; /* * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - ip_set_list[index] = set; + nfnl_set(index) = set; return ret; @@ -772,10 +840,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { static void ip_set_destroy_set(ip_set_id_t index) { - struct ip_set *set = ip_set_list[index]; + struct ip_set *set = nfnl_set(index); pr_debug("set: %s\n", set->name); - ip_set_list[index] = NULL; + nfnl_set(index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -788,6 +856,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -807,22 +876,24 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { + s = nfnl_set(i); + if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } } read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL) + s = nfnl_set(i); + if (s != NULL) ip_set_destroy_set(i); } } else { - i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) { + s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i); + if (s == NULL) { ret = -ENOENT; goto out; - } else if (ip_set_list[i]->ref) { + } else if (s->ref) { ret = -IPSET_ERR_BUSY; goto out; } @@ -853,21 +924,24 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set *s; ip_set_id_t i; if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) - if (ip_set_list[i] != NULL) - ip_set_flush_set(ip_set_list[i]); + for (i = 0; i < ip_set_max; i++) { + s = nfnl_set(i); + if (s != NULL) + ip_set_flush_set(s); + } } else { - i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) + s = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (s == NULL) return -ENOENT; - ip_set_flush_set(ip_set_list[i]); + ip_set_flush_set(s); } return 0; @@ -889,7 +963,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set *set; + struct ip_set *set, *s; const char *name2; ip_set_id_t i; int ret = 0; @@ -911,8 +985,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && - STREQ(ip_set_list[i]->name, name2)) { + s = nfnl_set(i); + if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; } @@ -947,17 +1021,14 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (from_id == IPSET_INVALID_ID) + from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); + if (from == NULL) return -ENOENT; - to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); - if (to_id == IPSET_INVALID_ID) + to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); + if (to == NULL) return -IPSET_ERR_EXIST_SETNAME2; - from = ip_set_list[from_id]; - to = ip_set_list[to_id]; - /* Features must not change. * Not an artificial restriction anymore, as we must prevent * possible loops created by swapping in setlist type of sets. */ @@ -971,8 +1042,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - ip_set_list[from_id] = to; - ip_set_list[to_id] = from; + nfnl_set(from_id) = to; + nfnl_set(to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -992,7 +1063,7 @@ static int ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[2]) { - pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); + pr_debug("release set %s\n", nfnl_set(cb->args[1])->name); ip_set_put_byindex((ip_set_id_t) cb->args[1]); } return 0; @@ -1030,8 +1101,11 @@ dump_init(struct netlink_callback *cb) */ if (cda[IPSET_ATTR_SETNAME]) { - index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); - if (index == IPSET_INVALID_ID) + struct ip_set *set; + + set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]), + &index); + if (set == NULL) return -ENOENT; dump_type = DUMP_ONE; @@ -1081,7 +1155,7 @@ dump_last: dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = ip_set_list[index]; + set = nfnl_set(index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1100,7 +1174,7 @@ dump_last: if (!cb->args[2]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - __ip_set_get(index); + __ip_set_get(set); } nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, @@ -1159,7 +1233,7 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - pr_debug("release set %s\n", ip_set_list[index]->name); + pr_debug("release set %s\n", nfnl_set(index)->name); ip_set_put_byindex(index); cb->args[2] = 0; } @@ -1409,17 +1483,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; - ip_set_id_t index; int ret = 0; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL)) return -IPSET_ERR_PROTOCOL; - index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (index == IPSET_INVALID_ID) + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) return -ENOENT; - set = ip_set_list[index]; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) @@ -1684,6 +1756,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } case IP_SET_OP_GET_BYNAME: { struct ip_set_req_get_set *req_get = data; + ip_set_id_t id; if (*len != sizeof(struct ip_set_req_get_set)) { ret = -EINVAL; @@ -1691,12 +1764,14 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(); - req_get->set.index = find_set_id(req_get->set.name); + find_set_and_id(req_get->set.name, &id); + req_get->set.index = id; nfnl_unlock(); goto copy; } case IP_SET_OP_GET_BYINDEX: { struct ip_set_req_get_set *req_get = data; + struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || req_get->set.index >= ip_set_max) { @@ -1704,9 +1779,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } nfnl_lock(); - strncpy(req_get->set.name, - ip_set_list[req_get->set.index] - ? ip_set_list[req_get->set.index]->name : "", + set = nfnl_set(req_get->set.index); + strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(); goto copy; @@ -1737,6 +1811,7 @@ static struct nf_sockopt_ops so_set __read_mostly = { static int __init ip_set_init(void) { + struct ip_set **list; int ret; if (max_sets) @@ -1744,22 +1819,22 @@ ip_set_init(void) if (ip_set_max >= IPSET_INVALID_ID) ip_set_max = IPSET_INVALID_ID - 1; - ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, - GFP_KERNEL); - if (!ip_set_list) + list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); + if (!list) return -ENOMEM; + rcu_assign_pointer(ip_set_list, list); ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); - kfree(ip_set_list); + kfree(list); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(ip_set_list); + kfree(list); return ret; } @@ -1770,10 +1845,12 @@ ip_set_init(void) static void __exit ip_set_fini(void) { + struct ip_set **list = rcu_dereference_protected(ip_set_list, 1); + /* There can't be any existing set */ nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(ip_set_list); + kfree(list); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0f241be28f9e..af175166fffa 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -221,11 +221,9 @@ destroy_conntrack(struct nf_conntrack *nfct) * too. */ nf_ct_remove_expectations(ct); - /* We overload first tuple to link into unconfirmed list. */ - if (!nf_ct_is_confirmed(ct)) { - BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); - hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - } + /* We overload first tuple to link into unconfirmed or dying list.*/ + BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); NF_CT_STAT_INC(net, delete); spin_unlock_bh(&nf_conntrack_lock); @@ -247,6 +245,9 @@ void nf_ct_delete_from_lists(struct nf_conn *ct) * Otherwise we can get spurious warnings. */ NF_CT_STAT_INC(net, delete_list); clean_from_lists(ct); + /* add this conntrack to the dying list */ + hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &net->ct.dying); spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists); @@ -268,31 +269,23 @@ static void death_by_event(unsigned long ul_conntrack) } /* we've got the event delivered, now it's dying */ set_bit(IPS_DYING_BIT, &ct->status); - spin_lock(&nf_conntrack_lock); - hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - spin_unlock(&nf_conntrack_lock); nf_ct_put(ct); } -void nf_ct_insert_dying_list(struct nf_conn *ct) +void nf_ct_dying_timeout(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); BUG_ON(ecache == NULL); - /* add this conntrack to the dying list */ - spin_lock_bh(&nf_conntrack_lock); - hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &net->ct.dying); - spin_unlock_bh(&nf_conntrack_lock); /* set a new timer to retry event delivery */ setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); add_timer(&ecache->timeout); } -EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); +EXPORT_SYMBOL_GPL(nf_ct_dying_timeout); static void death_by_timeout(unsigned long ul_conntrack) { @@ -307,7 +300,7 @@ static void death_by_timeout(unsigned long ul_conntrack) unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { /* destroy event was not delivered */ nf_ct_delete_from_lists(ct); - nf_ct_insert_dying_list(ct); + nf_ct_dying_timeout(ct); return; } set_bit(IPS_DYING_BIT, &ct->status); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7bbfb3deea30..4e078cd84d83 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -898,7 +898,8 @@ ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone) } static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { - [CTA_HELP_NAME] = { .type = NLA_NUL_STRING }, + [CTA_HELP_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN - 1 }, }; static inline int @@ -932,6 +933,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_ID] = { .type = NLA_U32 }, [CTA_NAT_DST] = { .type = NLA_NESTED }, [CTA_TUPLE_MASTER] = { .type = NLA_NESTED }, + [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NLA_NESTED }, + [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, }; @@ -989,7 +992,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, nlmsg_report(nlh)) < 0) { nf_ct_delete_from_lists(ct); /* we failed to report the event, try later */ - nf_ct_insert_dying_list(ct); + nf_ct_dying_timeout(ct); nf_ct_put(ct); return 0; } @@ -1089,6 +1092,112 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } +static int ctnetlink_done_list(struct netlink_callback *cb) +{ + if (cb->args[1]) + nf_ct_put((struct nf_conn *)cb->args[1]); + return 0; +} + +static int +ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, + struct hlist_nulls_head *list) +{ + struct nf_conn *ct, *last; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u_int8_t l3proto = nfmsg->nfgen_family; + int res; + + if (cb->args[2]) + return 0; + + spin_lock_bh(&nf_conntrack_lock); + last = (struct nf_conn *)cb->args[1]; +restart: + hlist_nulls_for_each_entry(h, n, list, hnnode) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (l3proto && nf_ct_l3num(ct) != l3proto) + continue; + if (cb->args[1]) { + if (ct != last) + continue; + cb->args[1] = 0; + } + rcu_read_lock(); + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct); + rcu_read_unlock(); + if (res < 0) { + nf_conntrack_get(&ct->ct_general); + cb->args[1] = (unsigned long)ct; + goto out; + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } else + cb->args[2] = 1; +out: + spin_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_put(last); + + return skb->len; +} + +static int +ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + + return ctnetlink_dump_list(skb, cb, &net->ct.dying); +} + +static int +ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_dying, + .done = ctnetlink_done_list, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return -EOPNOTSUPP; +} + +static int +ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + + return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed); +} + +static int +ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_unconfirmed, + .done = ctnetlink_done_list, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return -EOPNOTSUPP; +} + #ifdef CONFIG_NF_NAT_NEEDED static int ctnetlink_parse_nat_setup(struct nf_conn *ct, @@ -2216,7 +2325,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_MASK] = { .type = NLA_NESTED }, [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, [CTA_EXPECT_ID] = { .type = NLA_U32 }, - [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, + [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, @@ -2712,6 +2822,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu }, [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct }, + [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying }, + [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed }, }; static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 61f9285111d1..83876e9877f1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1353,6 +1353,8 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 }, + [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 }, + [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8d2cf9ec37a8..d812c1235b30 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -14,84 +14,32 @@ #include "nf_internals.h" /* - * A queue handler may be registered for each protocol. Each is protected by - * long term mutex. The handler must provide an an outfn() to accept packets - * for queueing and must reinject all packets it receives, no matter what. + * Hook for nfnetlink_queue to register its queue handler. + * We do this so that most of the NFQUEUE code can be modular. + * + * Once the queue is registered it must reinject all packets it + * receives, no matter what. */ -static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; - -static DEFINE_MUTEX(queue_handler_mutex); +static const struct nf_queue_handler __rcu *queue_handler __read_mostly; /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) +void nf_register_queue_handler(const struct nf_queue_handler *qh) { - int ret; - const struct nf_queue_handler *old; - - if (pf >= ARRAY_SIZE(queue_handler)) - return -EINVAL; - - mutex_lock(&queue_handler_mutex); - old = rcu_dereference_protected(queue_handler[pf], - lockdep_is_held(&queue_handler_mutex)); - if (old == qh) - ret = -EEXIST; - else if (old) - ret = -EBUSY; - else { - rcu_assign_pointer(queue_handler[pf], qh); - ret = 0; - } - mutex_unlock(&queue_handler_mutex); - - return ret; + /* should never happen, we only have one queueing backend in kernel */ + WARN_ON(rcu_access_pointer(queue_handler)); + rcu_assign_pointer(queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) +void nf_unregister_queue_handler(void) { - const struct nf_queue_handler *old; - - if (pf >= ARRAY_SIZE(queue_handler)) - return -EINVAL; - - mutex_lock(&queue_handler_mutex); - old = rcu_dereference_protected(queue_handler[pf], - lockdep_is_held(&queue_handler_mutex)); - if (old && old != qh) { - mutex_unlock(&queue_handler_mutex); - return -EINVAL; - } - - RCU_INIT_POINTER(queue_handler[pf], NULL); - mutex_unlock(&queue_handler_mutex); - + RCU_INIT_POINTER(queue_handler, NULL); synchronize_rcu(); - - return 0; } EXPORT_SYMBOL(nf_unregister_queue_handler); -void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) -{ - u_int8_t pf; - - mutex_lock(&queue_handler_mutex); - for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { - if (rcu_dereference_protected( - queue_handler[pf], - lockdep_is_held(&queue_handler_mutex) - ) == qh) - RCU_INIT_POINTER(queue_handler[pf], NULL); - } - mutex_unlock(&queue_handler_mutex); - - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); - static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { /* Release those devices we held, or Alexey will kill me. */ @@ -137,7 +85,7 @@ static int __nf_queue(struct sk_buff *skb, /* QUEUE == DROP if no one is waiting, to be safe. */ rcu_read_lock(); - qh = rcu_dereference(queue_handler[pf]); + qh = rcu_dereference(queue_handler); if (!qh) { status = -ESRCH; goto err_unlock; @@ -344,77 +292,3 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) kfree(entry); } EXPORT_SYMBOL(nf_reinject); - -#ifdef CONFIG_PROC_FS -static void *seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos >= ARRAY_SIZE(queue_handler)) - return NULL; - - return pos; -} - -static void *seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - - if (*pos >= ARRAY_SIZE(queue_handler)) - return NULL; - - return pos; -} - -static void seq_stop(struct seq_file *s, void *v) -{ - -} - -static int seq_show(struct seq_file *s, void *v) -{ - int ret; - loff_t *pos = v; - const struct nf_queue_handler *qh; - - rcu_read_lock(); - qh = rcu_dereference(queue_handler[*pos]); - if (!qh) - ret = seq_printf(s, "%2lld NONE\n", *pos); - else - ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); - rcu_read_unlock(); - - return ret; -} - -static const struct seq_operations nfqueue_seq_ops = { - .start = seq_start, - .next = seq_next, - .stop = seq_stop, - .show = seq_show, -}; - -static int nfqueue_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &nfqueue_seq_ops); -} - -static const struct file_operations nfqueue_file_ops = { - .owner = THIS_MODULE, - .open = nfqueue_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* PROC_FS */ - - -int __init netfilter_queue_init(void) -{ -#ifdef CONFIG_PROC_FS - if (!proc_create("nf_queue", S_IRUGO, - proc_net_netfilter, &nfqueue_file_ops)) - return -1; -#endif - return 0; -} - diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index e12d44e75b21..3158d87b56a8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -809,7 +809,6 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { }; static const struct nf_queue_handler nfqh = { - .name = "nf_queue", .outfn = &nfqnl_enqueue_packet, }; @@ -827,14 +826,10 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, if (nfqa[NFQA_CFG_CMD]) { cmd = nla_data(nfqa[NFQA_CFG_CMD]); - /* Commands without queue context - might sleep */ + /* Obsolete commands without queue context */ switch (cmd->command) { - case NFQNL_CFG_CMD_PF_BIND: - return nf_register_queue_handler(ntohs(cmd->pf), - &nfqh); - case NFQNL_CFG_CMD_PF_UNBIND: - return nf_unregister_queue_handler(ntohs(cmd->pf), - &nfqh); + case NFQNL_CFG_CMD_PF_BIND: return 0; + case NFQNL_CFG_CMD_PF_UNBIND: return 0; } } @@ -1074,6 +1069,7 @@ static int __init nfnetlink_queue_init(void) #endif register_netdevice_notifier(&nfqnl_dev_notifier); + nf_register_queue_handler(&nfqh); return status; #ifdef CONFIG_PROC_FS @@ -1087,7 +1083,7 @@ cleanup_netlink_notifier: static void __exit nfnetlink_queue_fini(void) { - nf_unregister_queue_handlers(&nfqh); + nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", proc_net_netfilter); |