There are some objects that are common in all the places
which are used to keep track of frag queues, they are:
* hash table
* LRU list
* rw lock
* rnd number for hash function
* the number of queues
* the amount of memory occupied by queues
* secret timer
Move all this stuff into one structure (struct inet_frags)
to make it possible use them uniformly in the future. Like
with the previous patch this mostly consists of hunks like
- write_lock(&ipfrag_lock);
+ write_lock(&ip4_frags.lock);
To address the issue with exporting the number of queues and
the amount of memory occupied by queues outside the .c file
they are declared in, I introduce a couple of helpers.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 74e9cb9..d51f238 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -18,4 +18,19 @@ struct inet_frag_queue {
#define LAST_IN 1
};
+#define INETFRAGS_HASHSZ 64
+
+struct inet_frags {
+ struct list_head lru_list;
+ struct hlist_head hash[INETFRAGS_HASHSZ];
+ rwlock_t lock;
+ u32 rnd;
+ int nqueues;
+ atomic_t mem;
+ struct timer_list secret_timer;
+};
+
+void inet_frags_init(struct inet_frags *);
+void inet_frags_fini(struct inet_frags *);
+
#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index 3af3ed9..a18dcec 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -333,8 +333,8 @@ enum ip_defrag_users
};
struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
-extern int ip_frag_nqueues;
-extern atomic_t ip_frag_mem;
+int ip_frag_mem(void);
+int ip_frag_nqueues(void);
/*
* Functions provided by ip_forward.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 31b3f1b..77cdab3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -252,8 +252,8 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);
-extern int ip6_frag_nqueues;
-extern atomic_t ip6_frag_mem;
+int ip6_frag_nqueues(void);
+int ip6_frag_mem(void);
#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d..93fe396 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
- sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+ inet_fragment.o
obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 0000000..69623ff
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,44 @@
+/*
+ * inet fragments management
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Pavel Emelyanov <xemul@openvz.org>
+ * Started as consolidation of ipv4/ip_fragment.c,
+ * ipv6/reassembly. and ipv6 nf conntrack reassembly
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+
+#include <net/inet_frag.h>
+
+void inet_frags_init(struct inet_frags *f)
+{
+ int i;
+
+ for (i = 0; i < INETFRAGS_HASHSZ; i++)
+ INIT_HLIST_HEAD(&f->hash[i]);
+
+ INIT_LIST_HEAD(&f->lru_list);
+ rwlock_init(&f->lock);
+
+ f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ (jiffies ^ (jiffies >> 6)));
+
+ f->nqueues = 0;
+ atomic_set(&f->mem, 0);
+
+}
+EXPORT_SYMBOL(inet_frags_init);
+
+void inet_frags_fini(struct inet_frags *f)
+{
+}
+EXPORT_SYMBOL(inet_frags_fini);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3eb1b6d..5e1667e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -87,39 +87,39 @@ struct ipq {
struct inet_peer *peer;
};
-/* Hash table. */
+static struct inet_frags ip4_frags;
-#define IPQ_HASHSZ 64
+int ip_frag_nqueues(void)
+{
+ return ip4_frags.nqueues;
+}
-/* Per-bucket lock is easy to add now. */
-static struct hlist_head ipq_hash[IPQ_HASHSZ];
-static DEFINE_RWLOCK(ipfrag_lock);
-static u32 ipfrag_hash_rnd;
-static LIST_HEAD(ipq_lru_list);
-int ip_frag_nqueues = 0;
+int ip_frag_mem(void)
+{
+ return atomic_read(&ip4_frags.mem);
+}
static __inline__ void __ipq_unlink(struct ipq *qp)
{
hlist_del(&qp->q.list);
list_del(&qp->q.lru_list);
- ip_frag_nqueues--;
+ ip4_frags.nqueues--;
}
static __inline__ void ipq_unlink(struct ipq *ipq)
{
- write_lock(&ipfrag_lock);
+ write_lock(&ip4_frags.lock);
__ipq_unlink(ipq);
- write_unlock(&ipfrag_lock);
+ write_unlock(&ip4_frags.lock);
}
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
- ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+ ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list ipfrag_secret_timer;
int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
static void ipfrag_secret_rebuild(unsigned long dummy)
@@ -127,13 +127,13 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
unsigned long now = jiffies;
int i;
- write_lock(&ipfrag_lock);
- get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
- for (i = 0; i < IPQ_HASHSZ; i++) {
+ write_lock(&ip4_frags.lock);
+ get_random_bytes(&ip4_frags.rnd, sizeof(u32));
+ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct ipq *q;
struct hlist_node *p, *n;
- hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], q.list) {
+ hlist_for_each_entry_safe(q, p, n, &ip4_frags.hash[i], q.list) {
unsigned int hval = ipqhashfn(q->id, q->saddr,
q->daddr, q->protocol);
@@ -141,23 +141,21 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
hlist_del(&q->q.list);
/* Relink to new hash chain. */
- hlist_add_head(&q->q.list, &ipq_hash[hval]);
+ hlist_add_head(&q->q.list, &ip4_frags.hash[hval]);
}
}
}
- write_unlock(&ipfrag_lock);
+ write_unlock(&ip4_frags.lock);
- mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+ mod_timer(&ip4_frags.secret_timer, now + sysctl_ipfrag_secret_interval);
}
-atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
-
/* Memory Tracking Functions. */
static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip_frag_mem);
+ atomic_sub(skb->truesize, &ip4_frags.mem);
kfree_skb(skb);
}
@@ -165,7 +163,7 @@ static __inline__ void frag_free_queue(struct ipq *qp, int *work)
{
if (work)
*work -= sizeof(struct ipq);
- atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+ atomic_sub(sizeof(struct ipq), &ip4_frags.mem);
kfree(qp);
}
@@ -175,7 +173,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
if (!qp)
return NULL;
- atomic_add(sizeof(struct ipq), &ip_frag_mem);
+ atomic_add(sizeof(struct ipq), &ip4_frags.mem);
return qp;
}
@@ -236,20 +234,20 @@ static void ip_evictor(void)
struct list_head *tmp;
int work;
- work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
+ work = atomic_read(&ip4_frags.mem) - sysctl_ipfrag_low_thresh;
if (work <= 0)
return;
while (work > 0) {
- read_lock(&ipfrag_lock);
- if (list_empty(&ipq_lru_list)) {
- read_unlock(&ipfrag_lock);
+ read_lock(&ip4_frags.lock);
+ if (list_empty(&ip4_frags.lru_list)) {
+ read_unlock(&ip4_frags.lock);
return;
}
- tmp = ipq_lru_list.next;
+ tmp = ip4_frags.lru_list.next;
qp = list_entry(tmp, struct ipq, q.lru_list);
atomic_inc(&qp->q.refcnt);
- read_unlock(&ipfrag_lock);
+ read_unlock(&ip4_frags.lock);
spin_lock(&qp->q.lock);
if (!(qp->q.last_in&COMPLETE))
@@ -301,7 +299,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
#endif
unsigned int hash;
- write_lock(&ipfrag_lock);
+ write_lock(&ip4_frags.lock);
hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
qp_in->protocol);
#ifdef CONFIG_SMP
@@ -309,14 +307,14 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
* such entry could be created on other cpu, while we
* promoted read lock to write lock.
*/
- hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) {
+ hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == qp_in->id &&
qp->saddr == qp_in->saddr &&
qp->daddr == qp_in->daddr &&
qp->protocol == qp_in->protocol &&
qp->user == qp_in->user) {
atomic_inc(&qp->q.refcnt);
- write_unlock(&ipfrag_lock);
+ write_unlock(&ip4_frags.lock);
qp_in->q.last_in |= COMPLETE;
ipq_put(qp_in, NULL);
return qp;
@@ -329,11 +327,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
atomic_inc(&qp->q.refcnt);
atomic_inc(&qp->q.refcnt);
- hlist_add_head(&qp->q.list, &ipq_hash[hash]);
+ hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
INIT_LIST_HEAD(&qp->q.lru_list);
- list_add_tail(&qp->q.lru_list, &ipq_lru_list);
- ip_frag_nqueues++;
- write_unlock(&ipfrag_lock);
+ list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+ ip4_frags.nqueues++;
+ write_unlock(&ip4_frags.lock);
retu
...