summaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_fragment.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r--net/ipv4/ip_fragment.c108
1 files changed, 43 insertions, 65 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index eb9d63a570cd..b66910aaef4d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -79,40 +79,11 @@ struct ipq {
struct inet_peer *peer;
};
-/* RFC 3168 support :
- * We want to check ECN values of all fragments, do detect invalid combinations.
- * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
- */
-#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
-#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
-#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
-#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
-
static inline u8 ip4_frag_ecn(u8 tos)
{
return 1 << (tos & INET_ECN_MASK);
}
-/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
- * Value : 0xff if frame should be dropped.
- * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
- */
-static const u8 ip4_frag_ecn_table[16] = {
- /* at least one fragment had CE, and others ECT_0 or ECT_1 */
- [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
- [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
- [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
-
- /* invalid combinations : drop frame */
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
-};
-
static struct inet_frags ip4_frags;
int ip_frag_nqueues(struct net *net)
@@ -122,7 +93,7 @@ int ip_frag_nqueues(struct net *net)
int ip_frag_mem(struct net *net)
{
- return atomic_read(&net->ipv4.frags.mem);
+ return sum_frag_mem_limit(&net->ipv4.frags);
}
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -161,13 +132,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
qp->user == arg->user;
}
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
-{
- atomic_sub(skb->truesize, &nf->mem);
- kfree_skb(skb);
-}
-
static void ip4_frag_init(struct inet_frag_queue *q, void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
@@ -255,8 +219,7 @@ static void ip_expire(unsigned long arg)
if (!head->dev)
goto out_rcu_unlock;
- /* skb dst is stale, drop it, and perform route lookup again */
- skb_dst_drop(head);
+ /* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
@@ -299,14 +262,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
- if (q == NULL)
- goto out_nomem;
-
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+ }
return container_of(q, struct ipq, q);
-
-out_nomem:
- LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
- return NULL;
}
/* Is the fragment too far ahead to be part of ipq? */
@@ -340,6 +300,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
static int ip_frag_reinit(struct ipq *qp)
{
struct sk_buff *fp;
+ unsigned int sum_truesize = 0;
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
atomic_inc(&qp->q.refcnt);
@@ -349,9 +310,12 @@ static int ip_frag_reinit(struct ipq *qp)
fp = qp->q.fragments;
do {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(qp->q.net, fp);
+
+ sum_truesize += fp->truesize;
+ kfree_skb(fp);
fp = xp;
} while (fp);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
qp->q.last_in = 0;
qp->q.len = 0;
@@ -496,7 +460,8 @@ found:
qp->q.fragments = next;
qp->q.meat -= free_it->len;
- frag_kfree_skb(qp->q.net, free_it);
+ sub_frag_mem_limit(&qp->q, free_it->truesize);
+ kfree_skb(free_it);
}
}
@@ -519,7 +484,7 @@ found:
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
- atomic_add(skb->truesize, &qp->q.net->mem);
+ add_frag_mem_limit(&qp->q, skb->truesize);
if (offset == 0)
qp->q.last_in |= INET_FRAG_FIRST_IN;
@@ -528,12 +493,17 @@ found:
qp->q.max_size = skb->len + ihl;
if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- qp->q.meat == qp->q.len)
- return ip_frag_reasm(qp, prev, dev);
+ qp->q.meat == qp->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ err = ip_frag_reasm(qp, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
- write_lock(&ip4_frags.lock);
- list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
- write_unlock(&ip4_frags.lock);
+ skb_dst_drop(skb);
+ inet_frag_lru_move(&qp->q);
return -EINPROGRESS;
err:
@@ -558,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
ipq_kill(qp);
- ecn = ip4_frag_ecn_table[qp->ecn];
+ ecn = ip_frag_ecn_table[qp->ecn];
if (unlikely(ecn == 0xff)) {
err = -EINVAL;
goto out_fail;
@@ -594,7 +564,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
goto out_oversize;
/* Head of list must not be cloned. */
- if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(head, GFP_ATOMIC))
goto out_nomem;
/* If the first fragment is fragmented itself, we split
@@ -617,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &qp->q.net->mem);
+ add_frag_mem_limit(&qp->q, clone->truesize);
}
skb_push(head, head->data - skb_network_header(head));
@@ -645,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
fp = next;
}
- atomic_sub(sum_truesize, &qp->q.net->mem);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
head->next = NULL;
head->dev = dev;
@@ -851,14 +821,22 @@ static inline void ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
- /*
- * Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to
- * measurably harm machine performance.
+ /* Fragment cache limits.
+ *
+ * The fragment memory accounting code, (tries to) account for
+ * the real memory usage, by measuring both the size of frag
+ * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
+ * and the SKB's truesize.
+ *
+ * A 64K fragment consumes 129736 bytes (44*2944)+200
+ * (1500 truesize == 2944, sizeof(struct ipq) == 200)
+ *
+ * We will commit 4MB at one time. Should we cross that limit
+ * we will prune down to 3MB, making room for approx 8 big 64K
+ * fragments 8x128k.
*/
- net->ipv4.frags.high_thresh = 256 * 1024;
- net->ipv4.frags.low_thresh = 192 * 1024;
+ net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
+ net->ipv4.frags.low_thresh = 3 * 1024 * 1024;
/*
* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival