| Home » Mailing lists » Devel » [PATCH 0/9] Consolidate IP fragment management Goto Forum:
	| 
		
			| [PATCH 0/9] Consolidate IP fragment management [message #21633] | Fri, 12 October 2007 12:55  |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| Patrick recently pointed out, that there are three places that 
perform IP fragments management. In ipv4, ipv6 and in ip6 
conntracks. Looks like these places can be a bit consolidated.
The proposal is to create a common structure inet_frag_queue to 
put common fields like list heads, refcounts etc in, and include
it into the specific fragment queues. Then such objects like 
hash tables, lists, locks etc are moved to common place (struct 
inet_frags). At the end common code is moved to the 
net/ipv4/inet_fragment.c.
The inet_ prefix in file names, data structures and functions, and
the code place (net/ipv4) was proposed by Alexey, but the exact
names were selectd by me, so maybe there can be a better ones.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org> |  
	|  |  |  
	| 
		
			| [PATCH 1/9] Move common fields from frag_queues in one place [message #21634 is a reply to message #21633] | Fri, 12 October 2007 13:00   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| Introduce the struct inet_frag_queue in include/net/inet_frag.h
file and place there all the common fields from three structs:
 * struct ipq in ipv4/ip_fragment.c
 * struct nf_ct_frag6_queue in nf_conntrack_reasm.c
 * struct frag_queue in ipv6/reassembly.c
After this, replace these fields on appropriate structures with
this structure instance and fix the users to use correct names
i.e. hunks like
-    atomic_dec(&fq->refcnt);
+    atomic_dec(&fq->q.refcnt);
(these occupy most of the patch)
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
new file mode 100644
index 0000000..74e9cb9
--- /dev/null
+++ b/include/net/inet_frag.h
@@ -0,0 +1,21 @@
+#ifndef __NET_FRAG_H__
+#define __NET_FRAG_H__
+
+struct inet_frag_queue {
+	struct hlist_node	list;
+	struct list_head	lru_list;   /* lru list member */
+	spinlock_t		lock;
+	atomic_t		refcnt;
+	struct timer_list	timer;      /* when will this queue expire? */
+	struct sk_buff		*fragments; /* list of received fragments */
+	ktime_t			stamp;
+	int			len;        /* total length of orig datagram */
+	int			meat;
+	__u8			last_in;    /* first/last segment arrived? */
+
+#define COMPLETE		4
+#define FIRST_IN		2
+#define LAST_IN			1
+};
+
+#endif
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fabb86d..3eb1b6d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -39,6 +39,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/inet_frag.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -74,25 +75,13 @@ struct ipfrag_skb_cb
 
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
-	struct hlist_node list;
-	struct list_head lru_list;	/* lru list member 			*/
+	struct inet_frag_queue q;
+
 	u32		user;
 	__be32		saddr;
 	__be32		daddr;
 	__be16		id;
 	u8		protocol;
-	u8		last_in;
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
-
-	struct sk_buff	*fragments;	/* linked list of received fragments	*/
-	int		len;		/* total length of original datagram	*/
-	int		meat;
-	spinlock_t	lock;
-	atomic_t	refcnt;
-	struct timer_list timer;	/* when will this queue expire?		*/
-	ktime_t		stamp;
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
@@ -111,8 +100,8 @@ int ip_frag_nqueues = 0;
 
 static __inline__ void __ipq_unlink(struct ipq *qp)
 {
-	hlist_del(&qp->list);
-	list_del(&qp->lru_list);
+	hlist_del(&qp->q.list);
+	list_del(&qp->q.lru_list);
 	ip_frag_nqueues--;
 }
 
@@ -144,15 +133,15 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
 		struct ipq *q;
 		struct hlist_node *p, *n;
 
-		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
+		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], q.list) {
 			unsigned int hval = ipqhashfn(q->id, q->saddr,
 						      q->daddr, q->protocol);
 
 			if (hval != i) {
-				hlist_del(&q->list);
+				hlist_del(&q->q.list);
 
 				/* Relink to new hash chain. */
-				hlist_add_head(&q->list, &ipq_hash[hval]);
+				hlist_add_head(&q->q.list, &ipq_hash[hval]);
 			}
 		}
 	}
@@ -198,14 +187,14 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
 {
 	struct sk_buff *fp;
 
-	BUG_TRAP(qp->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&qp->timer) == 0);
+	BUG_TRAP(qp->q.last_in&COMPLETE);
+	BUG_TRAP(del_timer(&qp->q.timer) == 0);
 
 	if (qp->peer)
 		inet_putpeer(qp->peer);
 
 	/* Release all fragment data. */
-	fp = qp->fragments;
+	fp = qp->q.fragments;
 	while (fp) {
 		struct sk_buff *xp = fp->next;
 
@@ -219,7 +208,7 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
 
 static __inline__ void ipq_put(struct ipq *ipq, int *work)
 {
-	if (atomic_dec_and_test(&ipq->refcnt))
+	if (atomic_dec_and_test(&ipq->q.refcnt))
 		ip_frag_destroy(ipq, work);
 }
 
@@ -228,13 +217,13 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work)
  */
 static void ipq_kill(struct ipq *ipq)
 {
-	if (del_timer(&ipq->timer))
-		atomic_dec(&ipq->refcnt);
+	if (del_timer(&ipq->q.timer))
+		atomic_dec(&ipq->q.refcnt);
 
-	if (!(ipq->last_in & COMPLETE)) {
+	if (!(ipq->q.last_in & COMPLETE)) {
 		ipq_unlink(ipq);
-		atomic_dec(&ipq->refcnt);
-		ipq->last_in |= COMPLETE;
+		atomic_dec(&ipq->q.refcnt);
+		ipq->q.last_in |= COMPLETE;
 	}
 }
 
@@ -258,14 +247,14 @@ static void ip_evictor(void)
 			return;
 		}
 		tmp = ipq_lru_list.next;
-		qp = list_entry(tmp, struct ipq, lru_list);
-		atomic_inc(&qp->refcnt);
+		qp = list_entry(tmp, struct ipq, q.lru_list);
+		atomic_inc(&qp->q.refcnt);
 		read_unlock(&ipfrag_lock);
 
-		spin_lock(&qp->lock);
-		if (!(qp->last_in&COMPLETE))
+		spin_lock(&qp->q.lock);
+		if (!(qp->q.last_in&COMPLETE))
 			ipq_kill(qp);
-		spin_unlock(&qp->lock);
+		spin_unlock(&qp->q.lock);
 
 		ipq_put(qp, &work);
 		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -279,9 +268,9 @@ static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp = (struct ipq *) arg;
 
-	spin_lock(&qp->lock);
+	spin_lock(&qp->q.lock);
 
-	if (qp->last_in & COMPLETE)
+	if (qp->q.last_in & COMPLETE)
 		goto out;
 
 	ipq_kill(qp);
@@ -289,8 +278,8 @@ static void ip_expire(unsigned long arg)
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
 
-	if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
-		struct sk_buff *head = qp->fragments;
+	if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
+		struct sk_buff *head = qp->q.fragments;
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
 			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -298,7 +287,7 @@ static void ip_expire(unsigned long arg)
 		}
 	}
 out:
-	spin_unlock(&qp->lock);
+	spin_unlock(&qp->q.lock);
 	ipq_put(qp, NULL);
 }
 
@@ -320,15 +309,15 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 	 * such entry could be created on other cpu, while we
 	 * promoted read lock to write lock.
 	 */
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+	hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) {
 		if (qp->id == qp_in->id		&&
 		    qp->saddr == qp_in->saddr	&&
 		    qp->daddr == qp_in->daddr	&&
 		    qp->protocol == qp_in->protocol &&
 		    qp->user == qp_in->user) {
-			atomic_inc(&qp->refcnt);
+			atomic_inc(&qp->q.refcnt);
 			write_unlock(&ipfrag_lock);
-			qp_in->last_in |= COMPLETE;
+			qp_in->q.last_in |= COMPLETE;
 			ipq_put(qp_in, NULL);
 			return qp;
 		}
@@ -336,13 +325,13 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 #endif
 	qp = qp_in;
 
-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
-		atomic_inc(&qp->refcnt);
+	if (!mod_timer(&qp->q.timer, jiffies + sysctl_ipfrag_time))
+		atomic_inc(&qp->q.refcnt);
 
-	atomic_inc(&qp->refcnt);
-	hlist_add_head(&qp->list, &ipq_hash[hash]);
-	INIT_LIST_HEAD(&qp->lru_list);
-	list_add_tail(&qp->lru_list, &ipq_lru_list);
+	atomic_inc(&qp->q.refcnt);
+	hlist_add_head(&qp->q.list, &ipq_hash[hash]);
+	INIT_LIST_HEAD(&qp->q.lru_list);
+	list_add_tail(&qp->q.lru_list, &ipq_lru_list);
 	ip_frag_nqueues++;
 	write_unlock(&ipfrag_lock);
 	return qp;
@@ -357,23 +346,23 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
 		goto out_nomem;
 
 	qp->protocol = iph->protocol;
-	qp->last_in = 0;
+	qp->q.last_in = 0;
 	qp->id = iph->id;
 	qp->saddr = iph->saddr;
 	qp->daddr = iph->daddr;
 	qp->user = user;
-	qp->len = 0;
-	qp->meat = 0;
-	qp->fragments = NULL;
+	qp->q.len = 0;
+	qp->q.meat = 0;
+	qp->q.fragments = NULL;
 	qp->iif = 0;
 	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
-	init_timer(&qp->timer);
-	qp->timer.data = (unsigned long) qp;	/* pointer to queue	*/
-	qp->timer.function = ip_expire;		/* expire function	*/
-	spin_lock_init(&qp->lock);
-	atomic_set(&qp->refcnt, 1);
+	init_timer(&qp->q.timer);
+	qp->q.timer.data = (unsigned long) qp;	/* pointer to queue	*/
+	qp->q.timer.function = ip_expire;		/* expire function	*/
+	spin_lock_init(&qp->q.lock);
+	atomic_set(&qp->q.refcnt, 1);
 
 	return ip_frag_intern(qp);
 
@@ -397,13 +386,13 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 
 	read_lock(&ipfrag_lock);
 	hash = ipqhashfn(id, saddr, daddr, protocol);
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+	hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) {
 		if (qp->id == id		&&
 		    qp->saddr == saddr	&&
 		    qp->daddr == daddr	&&
 		    qp->protocol == protocol &&
 		    qp->user == user) {
-			atomic_inc(&qp->refcnt);
+			atomic_inc(&qp->q.refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
 		}
@@ -429,7 +418,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
 	end = atomic_inc_return(&peer->rid);
 	qp->rid = end;
 
-	rc = qp->fragments && (end - start) > max;
+	rc = qp->q.fragments && (end - start) > max;
 
 	if (rc) {
 		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -442,22 +431,22 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	struct sk_buff *fp;
 
-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
-		atomic_inc(&qp->refcnt);
+	if (!mod_timer(&qp->q.tim...
 
 |  
	|  |  |  
	| 
		
			| [PATCH 2/9] Collect frag queues management objects together [message #21635 is a reply to message #21633] | Fri, 12 October 2007 13:06   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| There are some objects that are common in all the places
which are used to keep track of frag queues, they are:
 * hash table
 * LRU list
 * rw lock
 * rnd number for hash function
 * the number of queues
 * the amount of memory occupied by queues
 * secret timer
Move all this stuff into one structure (struct inet_frags)
to make it possible use them uniformly in the future. Like
with the previous patch this mostly consists of hunks like
-    write_lock(&ipfrag_lock);
+    write_lock(&ip4_frags.lock);
To address the issue with exporting the number of queues and 
the amount of memory occupied by queues outside the .c file
they are declared in, I introduce a couple of helpers.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 74e9cb9..d51f238 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -18,4 +18,19 @@ struct inet_frag_queue {
 #define LAST_IN			1
 };
 
+#define INETFRAGS_HASHSZ		64
+
+struct inet_frags {
+	struct list_head	lru_list;
+	struct hlist_head	hash[INETFRAGS_HASHSZ];
+	rwlock_t		lock;
+	u32			rnd;
+	int			nqueues;
+	atomic_t		mem;
+	struct timer_list	secret_timer;
+};
+
+void inet_frags_init(struct inet_frags *);
+void inet_frags_fini(struct inet_frags *);
+
 #endif
diff --git a/include/net/ip.h b/include/net/ip.h
index 3af3ed9..a18dcec 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -333,8 +333,8 @@ enum ip_defrag_users
 };
 
 struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
-extern int ip_frag_nqueues;
-extern atomic_t ip_frag_mem;
+int ip_frag_mem(void);
+int ip_frag_nqueues(void);
 
 /*
  *	Functions provided by ip_forward.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 31b3f1b..77cdab3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -252,8 +252,8 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 
 extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);
 
-extern int ip6_frag_nqueues;
-extern atomic_t ip6_frag_mem;
+int ip6_frag_nqueues(void);
+int ip6_frag_mem(void);
 
 #define IPV6_FRAG_TIMEOUT	(60*HZ)		/* 60 seconds */
 
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d..93fe396 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+	     inet_fragment.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 0000000..69623ff
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,44 @@
+/*
+ * inet fragments management
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * 		Authors:	Pavel Emelyanov <xemul@openvz.org>
+ *				Started as consolidation of ipv4/ip_fragment.c,
+ *				ipv6/reassembly. and ipv6 nf conntrack reassembly
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+
+#include <net/inet_frag.h>
+
+void inet_frags_init(struct inet_frags *f)
+{
+	int i;
+
+	for (i = 0; i < INETFRAGS_HASHSZ; i++)
+		INIT_HLIST_HEAD(&f->hash[i]);
+
+	INIT_LIST_HEAD(&f->lru_list);
+	rwlock_init(&f->lock);
+
+	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				   (jiffies ^ (jiffies >> 6)));
+
+	f->nqueues = 0;
+	atomic_set(&f->mem, 0);
+
+}
+EXPORT_SYMBOL(inet_frags_init);
+
+void inet_frags_fini(struct inet_frags *f)
+{
+}
+EXPORT_SYMBOL(inet_frags_fini);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3eb1b6d..5e1667e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -87,39 +87,39 @@ struct ipq {
 	struct inet_peer *peer;
 };
 
-/* Hash table. */
+static struct inet_frags ip4_frags;
 
-#define IPQ_HASHSZ	64
+int ip_frag_nqueues(void)
+{
+	return ip4_frags.nqueues;
+}
 
-/* Per-bucket lock is easy to add now. */
-static struct hlist_head ipq_hash[IPQ_HASHSZ];
-static DEFINE_RWLOCK(ipfrag_lock);
-static u32 ipfrag_hash_rnd;
-static LIST_HEAD(ipq_lru_list);
-int ip_frag_nqueues = 0;
+int ip_frag_mem(void)
+{
+	return atomic_read(&ip4_frags.mem);
+}
 
 static __inline__ void __ipq_unlink(struct ipq *qp)
 {
 	hlist_del(&qp->q.list);
 	list_del(&qp->q.lru_list);
-	ip_frag_nqueues--;
+	ip4_frags.nqueues--;
 }
 
 static __inline__ void ipq_unlink(struct ipq *ipq)
 {
-	write_lock(&ipfrag_lock);
+	write_lock(&ip4_frags.lock);
 	__ipq_unlink(ipq);
-	write_unlock(&ipfrag_lock);
+	write_unlock(&ip4_frags.lock);
 }
 
 static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 {
 	return jhash_3words((__force u32)id << 16 | prot,
 			    (__force u32)saddr, (__force u32)daddr,
-			    ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+			    ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list ipfrag_secret_timer;
 int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ipfrag_secret_rebuild(unsigned long dummy)
@@ -127,13 +127,13 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
 	unsigned long now = jiffies;
 	int i;
 
-	write_lock(&ipfrag_lock);
-	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
-	for (i = 0; i < IPQ_HASHSZ; i++) {
+	write_lock(&ip4_frags.lock);
+	get_random_bytes(&ip4_frags.rnd, sizeof(u32));
+	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
 		struct ipq *q;
 		struct hlist_node *p, *n;
 
-		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], q.list) {
+		hlist_for_each_entry_safe(q, p, n, &ip4_frags.hash[i], q.list) {
 			unsigned int hval = ipqhashfn(q->id, q->saddr,
 						      q->daddr, q->protocol);
 
@@ -141,23 +141,21 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
 				hlist_del(&q->q.list);
 
 				/* Relink to new hash chain. */
-				hlist_add_head(&q->q.list, &ipq_hash[hval]);
+				hlist_add_head(&q->q.list, &ip4_frags.hash[hval]);
 			}
 		}
 	}
-	write_unlock(&ipfrag_lock);
+	write_unlock(&ip4_frags.lock);
 
-	mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+	mod_timer(&ip4_frags.secret_timer, now + sysctl_ipfrag_secret_interval);
 }
 
-atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
-
 /* Memory Tracking Functions. */
 static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip_frag_mem);
+	atomic_sub(skb->truesize, &ip4_frags.mem);
 	kfree_skb(skb);
 }
 
@@ -165,7 +163,7 @@ static __inline__ void frag_free_queue(struct ipq *qp, int *work)
 {
 	if (work)
 		*work -= sizeof(struct ipq);
-	atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+	atomic_sub(sizeof(struct ipq), &ip4_frags.mem);
 	kfree(qp);
 }
 
@@ -175,7 +173,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
 
 	if (!qp)
 		return NULL;
-	atomic_add(sizeof(struct ipq), &ip_frag_mem);
+	atomic_add(sizeof(struct ipq), &ip4_frags.mem);
 	return qp;
 }
 
@@ -236,20 +234,20 @@ static void ip_evictor(void)
 	struct list_head *tmp;
 	int work;
 
-	work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
+	work = atomic_read(&ip4_frags.mem) - sysctl_ipfrag_low_thresh;
 	if (work <= 0)
 		return;
 
 	while (work > 0) {
-		read_lock(&ipfrag_lock);
-		if (list_empty(&ipq_lru_list)) {
-			read_unlock(&ipfrag_lock);
+		read_lock(&ip4_frags.lock);
+		if (list_empty(&ip4_frags.lru_list)) {
+			read_unlock(&ip4_frags.lock);
 			return;
 		}
-		tmp = ipq_lru_list.next;
+		tmp = ip4_frags.lru_list.next;
 		qp = list_entry(tmp, struct ipq, q.lru_list);
 		atomic_inc(&qp->q.refcnt);
-		read_unlock(&ipfrag_lock);
+		read_unlock(&ip4_frags.lock);
 
 		spin_lock(&qp->q.lock);
 		if (!(qp->q.last_in&COMPLETE))
@@ -301,7 +299,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 #endif
 	unsigned int hash;
 
-	write_lock(&ipfrag_lock);
+	write_lock(&ip4_frags.lock);
 	hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
 			 qp_in->protocol);
 #ifdef CONFIG_SMP
@@ -309,14 +307,14 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 	 * such entry could be created on other cpu, while we
 	 * promoted read lock to write lock.
 	 */
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) {
+	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
 		if (qp->id == qp_in->id		&&
 		    qp->saddr == qp_in->saddr	&&
 		    qp->daddr == qp_in->daddr	&&
 		    qp->protocol == qp_in->protocol &&
 		    qp->user == qp_in->user) {
 			atomic_inc(&qp->q.refcnt);
-			write_unlock(&ipfrag_lock);
+			write_unlock(&ip4_frags.lock);
 			qp_in->q.last_in |= COMPLETE;
 			ipq_put(qp_in, NULL);
 			return qp;
@@ -329,11 +327,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 		atomic_inc(&qp->q.refcnt);
 
 	atomic_inc(&qp->q.refcnt);
-	hlist_add_head(&qp->q.list, &ipq_hash[hash]);
+	hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
 	INIT_LIST_HEAD(&qp->q.lru_list);
-	list_add_tail(&qp->q.lru_list, &ipq_lru_list);
-	ip_frag_nqueues++;
-	write_unlock(&ipfrag_lock);
+	list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+	ip4_frags.nqueues++;
+	write_unlock(&ip4_frags.lock);
 	retu...
 
 |  
	|  |  |  
	| 
		
			| [PATCH 3/9] Collect common sysctl variables together [message #21636 is a reply to message #21633] | Fri, 12 October 2007 13:10   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| Some sysctl variables are used to tune the frag queues
management and it will be useful to work with them in
a common way in the future, so move them into one
structure, moreover they are the same for all the frag
management codes.
I don't place them in the existing inet_frags object,
introduced in the previous patch for two reasons:
 1. to keep them in the __read_mostly section;
 2. not to export the whole inet_frags objects outside.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index d51f238..ada03ba 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -20,6 +20,13 @@ struct inet_frag_queue {
 
 #define INETFRAGS_HASHSZ		64
 
+struct inet_frags_ctl {
+	int high_thresh;
+	int low_thresh;
+	int timeout;
+	int secret_interval;
+};
+
 struct inet_frags {
 	struct list_head	lru_list;
 	struct hlist_head	hash[INETFRAGS_HASHSZ];
@@ -28,6 +35,7 @@ struct inet_frags {
 	int			nqueues;
 	atomic_t		mem;
 	struct timer_list	secret_timer;
+	struct inet_frags_ctl	*ctl;
 };
 
 void inet_frags_init(struct inet_frags *);
diff --git a/include/net/ip.h b/include/net/ip.h
index a18dcec..e7b0feb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -177,10 +177,8 @@ extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
 
 /* From ip_fragment.c */
-extern int sysctl_ipfrag_high_thresh; 
-extern int sysctl_ipfrag_low_thresh;
-extern int sysctl_ipfrag_time;
-extern int sysctl_ipfrag_secret_interval;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl ip4_frags_ctl;
 extern int sysctl_ipfrag_max_dist;
 
 /* From inetpeer.c */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 77cdab3..b29d76c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -565,10 +565,8 @@ extern int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 /*
  * reassembly.c
  */
-extern int sysctl_ip6frag_high_thresh;
-extern int sysctl_ip6frag_low_thresh;
-extern int sysctl_ip6frag_time;
-extern int sysctl_ip6frag_secret_interval;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl ip6_frags_ctl;
 
 extern const struct proto_ops inet6_stream_ops;
 extern const struct proto_ops inet6_dgram_ops;
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 070d12c..f703533 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -15,8 +15,7 @@ extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 			       struct net_device *out,
 			       int (*okfn)(struct sk_buff *));
 
-extern unsigned int nf_ct_frag6_timeout;
-extern unsigned int nf_ct_frag6_low_thresh;
-extern unsigned int nf_ct_frag6_high_thresh;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl nf_frags_ctl;
 
 #endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 5e1667e..61035a8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -50,21 +50,8 @@
  * as well. Or notify me, at least. --ANK
  */
 
-/* Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to measurably
- * harm machine performance.
- */
-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-
 int sysctl_ipfrag_max_dist __read_mostly = 64;
 
-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
- */
-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
-
 struct ipfrag_skb_cb
 {
 	struct inet_skb_parm	h;
@@ -87,6 +74,25 @@ struct ipq {
 	struct inet_peer *peer;
 };
 
+struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+	/*
+	 * Fragment cache limits. We will commit 256K at one time. Should we
+	 * cross that limit we will prune down to 192K. This should cope with
+	 * even the most extreme cases without allowing an attacker to
+	 * measurably harm machine performance.
+	 */
+	.high_thresh	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
+
+	/*
+	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
+	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
+	 * by TTL.
+	 */
+	.timeout	 = IP_FRAG_TIME,
+	.secret_interval = 10 * 60 * HZ,
+};
+
 static struct inet_frags ip4_frags;
 
 int ip_frag_nqueues(void)
@@ -120,8 +126,6 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 			    ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
 }
 
-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
-
 static void ipfrag_secret_rebuild(unsigned long dummy)
 {
 	unsigned long now = jiffies;
@@ -147,7 +151,7 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
 	}
 	write_unlock(&ip4_frags.lock);
 
-	mod_timer(&ip4_frags.secret_timer, now + sysctl_ipfrag_secret_interval);
+	mod_timer(&ip4_frags.secret_timer, now + ip4_frags_ctl.secret_interval);
 }
 
 /* Memory Tracking Functions. */
@@ -234,7 +238,7 @@ static void ip_evictor(void)
 	struct list_head *tmp;
 	int work;
 
-	work = atomic_read(&ip4_frags.mem) - sysctl_ipfrag_low_thresh;
+	work = atomic_read(&ip4_frags.mem) - ip4_frags_ctl.low_thresh;
 	if (work <= 0)
 		return;
 
@@ -323,7 +327,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 #endif
 	qp = qp_in;
 
-	if (!mod_timer(&qp->q.timer, jiffies + sysctl_ipfrag_time))
+	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
 		atomic_inc(&qp->q.refcnt);
 
 	atomic_inc(&qp->q.refcnt);
@@ -429,7 +433,7 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	struct sk_buff *fp;
 
-	if (!mod_timer(&qp->q.timer, jiffies + sysctl_ipfrag_time)) {
+	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
 		atomic_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
@@ -693,7 +697,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
 
 	/* Start by cleaning up the memory. */
-	if (atomic_read(&ip4_frags.mem) > sysctl_ipfrag_high_thresh)
+	if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
 		ip_evictor();
 
 	dev = skb->dev;
@@ -724,9 +728,10 @@ void __init ipfrag_init(void)
 {
 	init_timer(&ip4_frags.secret_timer);
 	ip4_frags.secret_timer.function = ipfrag_secret_rebuild;
-	ip4_frags.secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
+	ip4_frags.secret_timer.expires = jiffies + ip4_frags_ctl.secret_interval;
 	add_timer(&ip4_frags.secret_timer);
 
+	ip4_frags.ctl = &ip4_frags_ctl;
 	inet_frags_init(&ip4_frags);
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index eb286ab..c98ef16 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/cipso_ipv4.h>
+#include <net/inet_frag.h>
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
@@ -357,7 +358,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
 		.procname	= "ipfrag_high_thresh",
-		.data		= &sysctl_ipfrag_high_thresh,
+		.data		= &ip4_frags_ctl.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -365,7 +366,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
 		.procname	= "ipfrag_low_thresh",
-		.data		= &sysctl_ipfrag_low_thresh,
+		.data		= &ip4_frags_ctl.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -381,7 +382,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_TIME,
 		.procname	= "ipfrag_time",
-		.data		= &sysctl_ipfrag_time,
+		.data		= &ip4_frags_ctl.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -732,7 +733,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
 		.procname	= "ipfrag_secret_interval",
-		.data		= &sysctl_ipfrag_secret_interval,
+		.data		= &ip4_frags_ctl.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 37a3db9..572c0bc 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -18,6 +18,7 @@
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
 #include <net/ipv6.h>
+#include <net/inet_frag.h>
 
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -307,7 +308,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_TIMEOUT,
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &nf_ct_frag6_timeout,
+		.data		= &nf_frags_ctl.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -315,7 +316,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &nf_ct_frag6_low_thresh,
+		.data		= &nf_frags_ctl.low_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -323,7 +324,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &nf_ct_frag6_high_thresh,
+		.data		= &nf_frags_ctl.high_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/ipv6/netfilter...
 
 |  
	|  |  |  
	| 
		
			| [PATCH 4/9] Consolidate the xxx_frag_kill [message #21637 is a reply to message #21633] | Fri, 12 October 2007 13:12   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| Since now all the xxx_frag_kill functions now work
with the generic inet_frag_queue data type, this can
be moved into a common place.
The xxx_unlink() code is moved as well.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index ada03ba..9902363 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -41,4 +41,6 @@ struct inet_frags {
 void inet_frags_init(struct inet_frags *);
 void inet_frags_fini(struct inet_frags *);
 
+void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+
 #endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 69623ff..534eaa8 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -42,3 +42,26 @@ void inet_frags_fini(struct inet_frags *f)
 {
 }
 EXPORT_SYMBOL(inet_frags_fini);
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+	write_lock(&f->lock);
+	hlist_del(&fq->list);
+	list_del(&fq->lru_list);
+	f->nqueues--;
+	write_unlock(&f->lock);
+}
+
+void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+	if (del_timer(&fq->timer))
+		atomic_dec(&fq->refcnt);
+
+	if (!(fq->last_in & COMPLETE)) {
+		fq_unlink(fq, f);
+		atomic_dec(&fq->refcnt);
+		fq->last_in |= COMPLETE;
+	}
+}
+
+EXPORT_SYMBOL(inet_frag_kill);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 61035a8..5b376c4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -105,20 +105,6 @@ int ip_frag_mem(void)
 	return atomic_read(&ip4_frags.mem);
 }
 
-static __inline__ void __ipq_unlink(struct ipq *qp)
-{
-	hlist_del(&qp->q.list);
-	list_del(&qp->q.lru_list);
-	ip4_frags.nqueues--;
-}
-
-static __inline__ void ipq_unlink(struct ipq *ipq)
-{
-	write_lock(&ip4_frags.lock);
-	__ipq_unlink(ipq);
-	write_unlock(&ip4_frags.lock);
-}
-
 static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 {
 	return jhash_3words((__force u32)id << 16 | prot,
@@ -219,14 +205,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work)
  */
 static void ipq_kill(struct ipq *ipq)
 {
-	if (del_timer(&ipq->q.timer))
-		atomic_dec(&ipq->q.refcnt);
-
-	if (!(ipq->q.last_in & COMPLETE)) {
-		ipq_unlink(ipq);
-		atomic_dec(&ipq->q.refcnt);
-		ipq->q.last_in |= COMPLETE;
-	}
+	inet_frag_kill(&ipq->q, &ip4_frags);
 }
 
 /* Memory limiting on fragments.  Evictor trashes the oldest
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 966a888..2ebe515 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -79,20 +79,6 @@ struct inet_frags_ctl nf_frags_ctl __read_mostly = {
 
 static struct inet_frags nf_frags;
 
-static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
-{
-	hlist_del(&fq->q.list);
-	list_del(&fq->q.lru_list);
-	nf_frags.nqueues--;
-}
-
-static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
-{
-	write_lock(&nf_frags.lock);
-	__fq_unlink(fq);
-	write_unlock(&nf_frags.lock);
-}
-
 static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 			       struct in6_addr *daddr)
 {
@@ -213,14 +199,7 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
  */
 static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
 {
-	if (del_timer(&fq->q.timer))
-		atomic_dec(&fq->q.refcnt);
-
-	if (!(fq->q.last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->q.refcnt);
-		fq->q.last_in |= COMPLETE;
-	}
+	inet_frag_kill(&fq->q, &nf_frags);
 }
 
 static void nf_ct_frag6_evictor(void)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f0e22be..57e32f4 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -100,20 +100,6 @@ int ip6_frag_mem(void)
 	return atomic_read(&ip6_frags.mem);
 }
 
-static __inline__ void __fq_unlink(struct frag_queue *fq)
-{
-	hlist_del(&fq->q.list);
-	list_del(&fq->q.lru_list);
-	ip6_frags.nqueues--;
-}
-
-static __inline__ void fq_unlink(struct frag_queue *fq)
-{
-	write_lock(&ip6_frags.lock);
-	__fq_unlink(fq);
-	write_unlock(&ip6_frags.lock);
-}
-
 /*
  * callers should be careful not to use the hash value outside the ipfrag_lock
  * as doing so could race with ipfrag_hash_rnd being recalculated.
@@ -236,14 +222,7 @@ static __inline__ void fq_put(struct frag_queue *fq, int *work)
  */
 static __inline__ void fq_kill(struct frag_queue *fq)
 {
-	if (del_timer(&fq->q.timer))
-		atomic_dec(&fq->q.refcnt);
-
-	if (!(fq->q.last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->q.refcnt);
-		fq->q.last_in |= COMPLETE;
-	}
+	inet_frag_kill(&fq->q, &ip6_frags);
 }
 
 static void ip6_evictor(struct inet6_dev *idev)
-- 
1.5.3.4 |  
	|  |  |  
	| 
		
			| [PATCH 5/9] Consolidate xxx_the secret_rebuild [message #21639 is a reply to message #21633] | Fri, 12 October 2007 13:16   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| This code works with the generic data types as well, so
move this into inet_fragment.c
This move makes it possible to hide the secret_timer
management and the secret_rebuild routine completely in
the inet_fragment.c
Introduce the ->hashfn() callback in inet_frags() to get
the hashfun for a given inet_frag_queue() object.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 9902363..e374412 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -36,6 +36,8 @@ struct inet_frags {
 	atomic_t		mem;
 	struct timer_list	secret_timer;
 	struct inet_frags_ctl	*ctl;
+
+	unsigned int		(*hashfn)(struct inet_frag_queue *);
 };
 
 void inet_frags_init(struct inet_frags *);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 534eaa8..ec10e05 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -16,9 +16,38 @@
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/mm.h>
+#include <linux/random.h>
 
 #include <net/inet_frag.h>
 
+static void inet_frag_secret_rebuild(unsigned long dummy)
+{
+	struct inet_frags *f = (struct inet_frags *)dummy;
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&f->lock);
+	get_random_bytes(&f->rnd, sizeof(u32));
+	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_queue *q;
+		struct hlist_node *p, *n;
+
+		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+			unsigned int hval = f->hashfn(q);
+
+			if (hval != i) {
+				hlist_del(&q->list);
+
+				/* Relink to new hash chain. */
+				hlist_add_head(&q->list, &f->hash[hval]);
+			}
+		}
+	}
+	write_unlock(&f->lock);
+
+	mod_timer(&f->secret_timer, now + f->ctl->secret_interval);
+}
+
 void inet_frags_init(struct inet_frags *f)
 {
 	int i;
@@ -35,11 +64,17 @@ void inet_frags_init(struct inet_frags *f)
 	f->nqueues = 0;
 	atomic_set(&f->mem, 0);
 
+	init_timer(&f->secret_timer);
+	f->secret_timer.function = inet_frag_secret_rebuild;
+	f->secret_timer.data = (unsigned long)f;
+	f->secret_timer.expires = jiffies + f->ctl->secret_interval;
+	add_timer(&f->secret_timer);
 }
 EXPORT_SYMBOL(inet_frags_init);
 
 void inet_frags_fini(struct inet_frags *f)
 {
+	del_timer(&f->secret_timer);
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 5b376c4..7aee137 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -112,32 +112,12 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 			    ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
 }
 
-static void ipfrag_secret_rebuild(unsigned long dummy)
+static unsigned int ip4_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
+	struct ipq *ipq;
 
-	write_lock(&ip4_frags.lock);
-	get_random_bytes(&ip4_frags.rnd, sizeof(u32));
-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
-		struct ipq *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &ip4_frags.hash[i], q.list) {
-			unsigned int hval = ipqhashfn(q->id, q->saddr,
-						      q->daddr, q->protocol);
-
-			if (hval != i) {
-				hlist_del(&q->q.list);
-
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->q.list, &ip4_frags.hash[hval]);
-			}
-		}
-	}
-	write_unlock(&ip4_frags.lock);
-
-	mod_timer(&ip4_frags.secret_timer, now + ip4_frags_ctl.secret_interval);
+	ipq = container_of(q, struct ipq, q);
+	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
 
 /* Memory Tracking Functions. */
@@ -705,12 +685,8 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 
 void __init ipfrag_init(void)
 {
-	init_timer(&ip4_frags.secret_timer);
-	ip4_frags.secret_timer.function = ipfrag_secret_rebuild;
-	ip4_frags.secret_timer.expires = jiffies + ip4_frags_ctl.secret_interval;
-	add_timer(&ip4_frags.secret_timer);
-
 	ip4_frags.ctl = &ip4_frags_ctl;
+	ip4_frags.hashfn = ip4_hashfn;
 	inet_frags_init(&ip4_frags);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 2ebe515..a3aef38 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -106,32 +106,12 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
+	struct nf_ct_frag6_queue *nq;
 
-	write_lock(&nf_frags.lock);
-	get_random_bytes(&nf_frags.rnd, sizeof(u32));
-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
-		struct nf_ct_frag6_queue *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &nf_frags.hash[i], q.list) {
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-			if (hval != i) {
-				hlist_del(&q->q.list);
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->q.list,
-					       &nf_frags.hash[hval]);
-			}
-		}
-	}
-	write_unlock(&nf_frags.lock);
-
-	mod_timer(&nf_frags.secret_timer, now + nf_frags_ctl.secret_interval);
+	nq = container_of(q, struct nf_ct_frag6_queue, q);
+	return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
 }
 
 /* Memory Tracking Functions. */
@@ -817,11 +797,8 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
 
 int nf_ct_frag6_init(void)
 {
-	setup_timer(&nf_frags.secret_timer, nf_ct_frag6_secret_rebuild, 0);
-	nf_frags.secret_timer.expires = jiffies + nf_frags_ctl.secret_interval;
-	add_timer(&nf_frags.secret_timer);
-
 	nf_frags.ctl = &nf_frags_ctl;
+	nf_frags.hashfn = nf_hashfn;
 	inet_frags_init(&nf_frags);
 
 	return 0;
@@ -831,7 +808,6 @@ void nf_ct_frag6_cleanup(void)
 {
 	inet_frags_fini(&nf_frags);
 
-	del_timer(&nf_frags.secret_timer);
 	nf_frags_ctl.low_thresh = 0;
 	nf_ct_frag6_evictor();
 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 57e32f4..7a357fd 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -131,35 +131,12 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-static void ip6_frag_secret_rebuild(unsigned long dummy)
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
-
-	write_lock(&ip6_frags.lock);
-	get_random_bytes(&ip6_frags.rnd, sizeof(u32));
-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
-		struct frag_queue *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &ip6_frags.hash[i], q.list) {
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-
-			if (hval != i) {
-				hlist_del(&q->q.list);
-
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->q.list,
-					       &ip6_frags.hash[hval]);
-
-			}
-		}
-	}
-	write_unlock(&ip6_frags.lock);
+	struct frag_queue *fq;
 
-	mod_timer(&ip6_frags.secret_timer, now + ip6_frags_ctl.secret_interval);
+	fq = container_of(q, struct frag_queue, q);
+	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
 }
 
 /* Memory Tracking Functions. */
@@ -742,11 +719,7 @@ void __init ipv6_frag_init(void)
 	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
 		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
 
-	init_timer(&ip6_frags.secret_timer);
-	ip6_frags.secret_timer.function = ip6_frag_secret_rebuild;
-	ip6_frags.secret_timer.expires = jiffies + ip6_frags_ctl.secret_interval;
-	add_timer(&ip6_frags.secret_timer);
-
 	ip6_frags.ctl = &ip6_frags_ctl;
+	ip6_frags.hashfn = ip6_hashfn;
 	inet_frags_init(&ip6_frags);
 }
-- 
1.5.3.4 |  
	|  |  |  
	| 
		
			| [PATCH 6/9] Consolidate the xxx_frag_destroy [message #21640 is a reply to message #21633] | Fri, 12 October 2007 13:21   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| To make in possible we need to know the exact frag queue
size for inet_frags->mem management and two callbacks:
 * to destoy the skb (optional, used in conntracks only)
 * to free the queue itself (mandatory, but later I plan to 
   move the allocation and the destruction of frag_queues 
   into the common place, so this callback will most likely
   be optional too).
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index e374412..2dd1cd4 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -33,16 +33,21 @@ struct inet_frags {
 	rwlock_t		lock;
 	u32			rnd;
 	int			nqueues;
+	int			qsize;
 	atomic_t		mem;
 	struct timer_list	secret_timer;
 	struct inet_frags_ctl	*ctl;
 
 	unsigned int		(*hashfn)(struct inet_frag_queue *);
+	void			(*destructor)(struct inet_frag_queue *);
+	void			(*skb_free)(struct sk_buff *);
 };
 
 void inet_frags_init(struct inet_frags *);
 void inet_frags_fini(struct inet_frags *);
 
 void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+void inet_frag_destroy(struct inet_frag_queue *q,
+				struct inet_frags *f, int *work);
 
 #endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index ec10e05..15fb2c4 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -17,6 +17,8 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
 
 #include <net/inet_frag.h>
 
@@ -100,3 +102,41 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 }
 
 EXPORT_SYMBOL(inet_frag_kill);
+
+static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
+						int *work)
+{
+	if (work)
+		*work -= skb->truesize;
+
+	atomic_sub(skb->truesize, &f->mem);
+	if (f->skb_free)
+		f->skb_free(skb);
+	kfree_skb(skb);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
+					int *work)
+{
+	struct sk_buff *fp;
+
+	BUG_TRAP(q->last_in & COMPLETE);
+	BUG_TRAP(del_timer(&q->timer) == 0);
+
+	/* Release all fragment data. */
+	fp = q->fragments;
+	while (fp) {
+		struct sk_buff *xp = fp->next;
+
+		frag_kfree_skb(f, fp, work);
+		fp = xp;
+	}
+
+	if (work)
+		*work -= f->qsize;
+	atomic_sub(f->qsize, &f->mem);
+
+	f->destructor(q);
+
+}
+EXPORT_SYMBOL(inet_frag_destroy);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7aee137..a59ac39 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -129,11 +129,13 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 	kfree_skb(skb);
 }
 
-static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 {
-	if (work)
-		*work -= sizeof(struct ipq);
-	atomic_sub(sizeof(struct ipq), &ip4_frags.mem);
+	struct ipq *qp;
+
+	qp = container_of(q, struct ipq, q);
+	if (qp->peer)
+		inet_putpeer(qp->peer);
 	kfree(qp);
 }
 
@@ -150,34 +152,10 @@ static __inline__ struct ipq *frag_alloc_queue(void)
 
 /* Destruction primitives. */
 
-/* Complete destruction of ipq. */
-static void ip_frag_destroy(struct ipq *qp, int *work)
-{
-	struct sk_buff *fp;
-
-	BUG_TRAP(qp->q.last_in&COMPLETE);
-	BUG_TRAP(del_timer(&qp->q.timer) == 0);
-
-	if (qp->peer)
-		inet_putpeer(qp->peer);
-
-	/* Release all fragment data. */
-	fp = qp->q.fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	/* Finally, release the queue descriptor itself. */
-	frag_free_queue(qp, work);
-}
-
 static __inline__ void ipq_put(struct ipq *ipq, int *work)
 {
 	if (atomic_dec_and_test(&ipq->q.refcnt))
-		ip_frag_destroy(ipq, work);
+		inet_frag_destroy(&ipq->q, &ip4_frags, work);
 }
 
 /* Kill ipq entry. It is not destroyed immediately,
@@ -687,6 +665,9 @@ void __init ipfrag_init(void)
 {
 	ip4_frags.ctl = &ip4_frags_ctl;
 	ip4_frags.hashfn = ip4_hashfn;
+	ip4_frags.destructor = ip4_frag_free;
+	ip4_frags.skb_free = NULL;
+	ip4_frags.qsize = sizeof(struct ipq);
 	inet_frags_init(&ip4_frags);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index a3aef38..785f5cd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -114,25 +114,25 @@ static unsigned int nf_hashfn(struct inet_frag_queue *q)
 	return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
 }
 
+static void nf_skb_free(struct sk_buff *skb)
+{
+	if (NFCT_FRAG6_CB(skb)->orig)
+		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
+
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
 {
 	if (work)
 		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf_frags.mem);
-	if (NFCT_FRAG6_CB(skb)->orig)
-		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-
+	nf_skb_free(skb);
 	kfree_skb(skb);
 }
 
-static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
-				   unsigned int *work)
+static void nf_frag_free(struct inet_frag_queue *q)
 {
-	if (work)
-		*work -= sizeof(struct nf_ct_frag6_queue);
-	atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
-	kfree(fq);
+	kfree(container_of(q, struct nf_ct_frag6_queue, q));
 }
 
 static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
@@ -147,31 +147,10 @@ static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
 
 /* Destruction primitives. */
 
-/* Complete destruction of fq. */
-static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
-				unsigned int *work)
-{
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->q.last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->q.timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->q.fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
-
 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
 {
 	if (atomic_dec_and_test(&fq->q.refcnt))
-		nf_ct_frag6_destroy(fq, work);
+		inet_frag_destroy(&fq->q, &nf_frags, work);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -799,6 +778,9 @@ int nf_ct_frag6_init(void)
 {
 	nf_frags.ctl = &nf_frags_ctl;
 	nf_frags.hashfn = nf_hashfn;
+	nf_frags.destructor = nf_frag_free;
+	nf_frags.skb_free = nf_skb_free;
+	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	inet_frags_init(&nf_frags);
 
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7a357fd..74b2113 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -148,12 +148,9 @@ static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 	kfree_skb(skb);
 }
 
-static inline void frag_free_queue(struct frag_queue *fq, int *work)
+static void ip6_frag_free(struct inet_frag_queue *fq)
 {
-	if (work)
-		*work -= sizeof(struct frag_queue);
-	atomic_sub(sizeof(struct frag_queue), &ip6_frags.mem);
-	kfree(fq);
+	kfree(container_of(fq, struct frag_queue, q));
 }
 
 static inline struct frag_queue *frag_alloc_queue(void)
@@ -168,30 +165,10 @@ static inline struct frag_queue *frag_alloc_queue(void)
 
 /* Destruction primitives. */
 
-/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq, int *work)
-{
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->q.last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->q.timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->q.fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
-
 static __inline__ void fq_put(struct frag_queue *fq, int *work)
 {
 	if (atomic_dec_and_test(&fq->q.refcnt))
-		ip6_frag_destroy(fq, work);
+		inet_frag_destroy(&fq->q, &ip6_frags, work);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -721,5 +698,8 @@ void __init ipv6_frag_init(void)
 
 	ip6_frags.ctl = &ip6_frags_ctl;
 	ip6_frags.hashfn = ip6_hashfn;
+	ip6_frags.destructor = ip6_frag_free;
+	ip6_frags.skb_free = NULL;
+	ip6_frags.qsize = sizeof(struct frag_queue);
 	inet_frags_init(&ip6_frags);
 }
-- 
1.5.3.4 |  
	|  |  |  
	| 
		
			| [PATCH 7/9] Consolidate the xxx_evictor [message #21641 is a reply to message #21633] | Fri, 12 October 2007 13:24   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| The evictors collect some statistics for ipv4 and ipv6, 
so make it return the number of evicted queues and account
them all at once in the caller.
The XXX_ADD_STATS_BH() macros are just for this case, 
but maybe there are places in code, that can make use of 
them as well.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 2dd1cd4..cf583cf 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -49,5 +49,6 @@ void inet_frags_fini(struct inet_frags *);
 void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
 void inet_frag_destroy(struct inet_frag_queue *q,
 				struct inet_frags *f, int *work);
+int inet_frag_evictor(struct inet_frags *f);
 
 #endif
diff --git a/include/net/ip.h b/include/net/ip.h
index e7b0feb..00ed4f3 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -160,6 +160,7 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 #define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
 #define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
 #define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
+#define IP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
 #define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b29d76c..a0f1042 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -120,12 +120,21 @@ extern int sysctl_mld_max_msf;
 	SNMP_INC_STATS##modifier(statname##_statistics, (field));	\
 })
 
+#define _DEVADD(statname, modifier, idev, field, val)			\
+({									\
+	struct inet6_dev *_idev = (idev);				\
+	if (likely(_idev != NULL))					\
+		SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \
+	SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\
+})
+
 /* MIBs */
 DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 
 #define IP6_INC_STATS(idev,field)	_DEVINC(ipv6, , idev, field)
 #define IP6_INC_STATS_BH(idev,field)	_DEVINC(ipv6, _BH, idev, field)
 #define IP6_INC_STATS_USER(idev,field)	_DEVINC(ipv6, _USER, idev, field)
+#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val)
 
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 15fb2c4..484cf51 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -140,3 +140,35 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 
 }
 EXPORT_SYMBOL(inet_frag_destroy);
+
+int inet_frag_evictor(struct inet_frags *f)
+{
+	struct inet_frag_queue *q;
+	int work, evicted = 0;
+
+	work = atomic_read(&f->mem) - f->ctl->low_thresh;
+	while (work > 0) {
+		read_lock(&f->lock);
+		if (list_empty(&f->lru_list)) {
+			read_unlock(&f->lock);
+			break;
+		}
+
+		q = list_first_entry(&f->lru_list,
+				struct inet_frag_queue, lru_list);
+		atomic_inc(&q->refcnt);
+		read_unlock(&f->lock);
+
+		spin_lock(&q->lock);
+		if (!(q->last_in & COMPLETE))
+			inet_frag_kill(q, f);
+		spin_unlock(&q->lock);
+
+		if (atomic_dec_and_test(&q->refcnt))
+			inet_frag_destroy(q, f, &work);
+		evicted++;
+	}
+
+	return evicted;
+}
+EXPORT_SYMBOL(inet_frag_evictor);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a59ac39..4ea7948 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -171,33 +171,11 @@ static void ipq_kill(struct ipq *ipq)
  */
 static void ip_evictor(void)
 {
-	struct ipq *qp;
-	struct list_head *tmp;
-	int work;
-
-	work = atomic_read(&ip4_frags.mem) - ip4_frags_ctl.low_thresh;
-	if (work <= 0)
-		return;
-
-	while (work > 0) {
-		read_lock(&ip4_frags.lock);
-		if (list_empty(&ip4_frags.lru_list)) {
-			read_unlock(&ip4_frags.lock);
-			return;
-		}
-		tmp = ip4_frags.lru_list.next;
-		qp = list_entry(tmp, struct ipq, q.lru_list);
-		atomic_inc(&qp->q.refcnt);
-		read_unlock(&ip4_frags.lock);
+	int evicted;
 
-		spin_lock(&qp->q.lock);
-		if (!(qp->q.last_in&COMPLETE))
-			ipq_kill(qp);
-		spin_unlock(&qp->q.lock);
-
-		ipq_put(qp, &work);
-		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	}
+	evicted = inet_frag_evictor(&ip4_frags);
+	if (evicted)
+		IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 /*
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 785f5cd..862d089 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -163,34 +163,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
 
 static void nf_ct_frag6_evictor(void)
 {
-	struct nf_ct_frag6_queue *fq;
-	struct list_head *tmp;
-	unsigned int work;
-
-	work = atomic_read(&nf_frags.mem);
-	if (work <= nf_frags_ctl.low_thresh)
-		return;
-
-	work -= nf_frags_ctl.low_thresh;
-	while (work > 0) {
-		read_lock(&nf_frags.lock);
-		if (list_empty(&nf_frags.lru_list)) {
-			read_unlock(&nf_frags.lock);
-			return;
-		}
-		tmp = nf_frags.lru_list.next;
-		BUG_ON(tmp == NULL);
-		fq = list_entry(tmp, struct nf_ct_frag6_queue, q.lru_list);
-		atomic_inc(&fq->q.refcnt);
-		read_unlock(&nf_frags.lock);
-
-		spin_lock(&fq->q.lock);
-		if (!(fq->q.last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->q.lock);
-
-		fq_put(fq, &work);
-	}
+	inet_frag_evictor(&nf_frags);
 }
 
 static void nf_ct_frag6_expire(unsigned long data)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 74b2113..454db16 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -181,33 +181,11 @@ static __inline__ void fq_kill(struct frag_queue *fq)
 
 static void ip6_evictor(struct inet6_dev *idev)
 {
-	struct frag_queue *fq;
-	struct list_head *tmp;
-	int work;
-
-	work = atomic_read(&ip6_frags.mem) - ip6_frags_ctl.low_thresh;
-	if (work <= 0)
-		return;
-
-	while(work > 0) {
-		read_lock(&ip6_frags.lock);
-		if (list_empty(&ip6_frags.lru_list)) {
-			read_unlock(&ip6_frags.lock);
-			return;
-		}
-		tmp = ip6_frags.lru_list.next;
-		fq = list_entry(tmp, struct frag_queue, q.lru_list);
-		atomic_inc(&fq->q.refcnt);
-		read_unlock(&ip6_frags.lock);
-
-		spin_lock(&fq->q.lock);
-		if (!(fq->q.last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->q.lock);
+	int evicted;
 
-		fq_put(fq, &work);
-		IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
-	}
+	evicted = inet_frag_evictor(&ip6_frags);
+	if (evicted)
+		IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 static void ip6_frag_expire(unsigned long data)
-- 
1.5.3.4 |  
	|  |  |  
	| 
		
			| [PATCH 8/9] Small cleanup for xxx_put after evictor consolidation [message #21642 is a reply to message #21633] | Fri, 12 October 2007 13:27   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| After the evictor code is consolidated there is no need in 
passing the extra pointer to the xxx_put() functions.
The only place when it made sense was the evictor code itself.
Maybe this change must got with the previous (or with the 
next) patch, but I try to make them shorter as much as 
possible to simplify the review (but they are still large 
anyway), so this change goes in a separate patch.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a59ac39..4ea7948 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -152,10 +152,10 @@ static __inline__ struct ipq *frag_alloc_queue(void)
 
 /* Destruction primitives. */
 
-static __inline__ void ipq_put(struct ipq *ipq, int *work)
+static __inline__ void ipq_put(struct ipq *ipq)
 {
 	if (atomic_dec_and_test(&ipq->q.refcnt))
-		inet_frag_destroy(&ipq->q, &ip4_frags, work);
+		inet_frag_destroy(&ipq->q, &ip4_frags, NULL);
 }
 
 /* Kill ipq entry. It is not destroyed immediately,
@@ -227,7 +205,7 @@ static void ip_expire(unsigned long arg)
 	}
 out:
 	spin_unlock(&qp->q.lock);
-	ipq_put(qp, NULL);
+	ipq_put(qp);
 }
 
 /* Creation primitives. */
@@ -257,7 +235,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 			atomic_inc(&qp->q.refcnt);
 			write_unlock(&ip4_frags.lock);
 			qp_in->q.last_in |= COMPLETE;
-			ipq_put(qp_in, NULL);
+			ipq_put(qp_in);
 			return qp;
 		}
 	}
@@ -652,7 +630,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 			ret = ip_frag_reasm(qp, dev);
 
 		spin_unlock(&qp->q.lock);
-		ipq_put(qp, NULL);
+		ipq_put(qp);
 		return ret;
 	}
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 785f5cd..862d089 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -147,10 +147,10 @@ static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
 
 /* Destruction primitives. */
 
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
 {
 	if (atomic_dec_and_test(&fq->q.refcnt))
-		inet_frag_destroy(&fq->q, &nf_frags, work);
+		inet_frag_destroy(&fq->q, &nf_frags, NULL);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -206,7 +179,7 @@ static void nf_ct_frag6_expire(unsigned long data)
 
 out:
 	spin_unlock(&fq->q.lock);
-	fq_put(fq, NULL);
+	fq_put(fq);
 }
 
 /* Creation primitives. */
@@ -228,7 +201,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
 			atomic_inc(&fq->q.refcnt);
 			write_unlock(&nf_frags.lock);
 			fq_in->q.last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
+			fq_put(fq_in);
 			return fq;
 		}
 	}
@@ -719,7 +692,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
 		spin_unlock(&fq->q.lock);
 		pr_debug("Can't insert skb to queue\n");
-		fq_put(fq, NULL);
+		fq_put(fq);
 		goto ret_orig;
 	}
 
@@ -730,7 +703,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	}
 	spin_unlock(&fq->q.lock);
 
-	fq_put(fq, NULL);
+	fq_put(fq);
 	return ret_skb;
 
 ret_orig:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 74b2113..454db16 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -165,10 +165,10 @@ static inline struct frag_queue *frag_alloc_queue(void)
 
 /* Destruction primitives. */
 
-static __inline__ void fq_put(struct frag_queue *fq, int *work)
+static __inline__ void fq_put(struct frag_queue *fq)
 {
 	if (atomic_dec_and_test(&fq->q.refcnt))
-		inet_frag_destroy(&fq->q, &ip6_frags, work);
+		inet_frag_destroy(&fq->q, &ip6_frags, NULL);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -246,7 +224,7 @@ out:
 	if (dev)
 		dev_put(dev);
 	spin_unlock(&fq->q.lock);
-	fq_put(fq, NULL);
+	fq_put(fq);
 }
 
 /* Creation primitives. */
@@ -270,7 +248,7 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
 			atomic_inc(&fq->q.refcnt);
 			write_unlock(&ip6_frags.lock);
 			fq_in->q.last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
+			fq_put(fq_in);
 			return fq;
 		}
 	}
@@ -676,7 +654,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 			ret = ip6_frag_reasm(fq, skbp, dev);
 
 		spin_unlock(&fq->q.lock);
-		fq_put(fq, NULL);
+		fq_put(fq);
 		return ret;
 	}
 
-- 
1.5.3.4 |  
	|  |  |  
	| 
		
			| [PATCH 9/9] Consolidate the xxx_put [message #21643 is a reply to message #21633] | Fri, 12 October 2007 13:29   |  
			| 
				
				
					|  Pavel Emelianov Messages: 1149
 Registered: September 2006
 | Senior Member |  |  |  
	| These ones use the generic data types too, so move
them in one place.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index cf583cf..911c2cd 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -51,4 +51,10 @@ void inet_frag_destroy(struct inet_frag_queue *q,
 				struct inet_frags *f, int *work);
 int inet_frag_evictor(struct inet_frags *f);
 
+static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+{
+	if (atomic_dec_and_test(&q->refcnt))
+		inet_frag_destroy(q, f, NULL);
+}
+
 #endif
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4ea7948..4470a50 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -154,8 +154,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
 
 static __inline__ void ipq_put(struct ipq *ipq)
 {
-	if (atomic_dec_and_test(&ipq->q.refcnt))
-		inet_frag_destroy(&ipq->q, &ip4_frags, NULL);
+	inet_frag_put(&ipq->q, &ip4_frags);
 }
 
 /* Kill ipq entry. It is not destroyed immediately,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 862d089..bf6f2f0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -149,8 +149,7 @@ static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
 
 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
 {
-	if (atomic_dec_and_test(&fq->q.refcnt))
-		inet_frag_destroy(&fq->q, &nf_frags, NULL);
+	inet_frag_put(&fq->q, &nf_frags);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 454db16..a50fbea 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -167,8 +167,7 @@ static inline struct frag_queue *frag_alloc_queue(void)
 
 static __inline__ void fq_put(struct frag_queue *fq)
 {
-	if (atomic_dec_and_test(&fq->q.refcnt))
-		inet_frag_destroy(&fq->q, &ip6_frags, NULL);
+	inet_frag_put(&fq->q, &ip6_frags);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
-- 
1.5.3.4 |  
	|  |  |  
	|  |  
	|  |  
	|  |  
	|  |  
	|  |  
	|  |  
	|  |  
	|  |  
	|  |  
	| 
		
			| Re: [PATCH 0/9] Consolidate IP fragment management [message #21718 is a reply to message #21633] | Mon, 15 October 2007 09:42  |  
			| 
				
				
					|  davem Messages: 463
 Registered: February 2006
 | Senior Member |  |  |  
	| From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 12 Oct 2007 16:55:10 +0400
> Patrick recently pointed out, that there are three places that 
> perform IP fragments management. In ipv4, ipv6 and in ip6 
> conntracks. Looks like these places can be a bit consolidated.
> 
> The proposal is to create a common structure inet_frag_queue to 
> put common fields like list heads, refcounts etc in, and include
> it into the specific fragment queues. Then such objects like 
> hash tables, lists, locks etc are moved to common place (struct 
> inet_frags). At the end common code is moved to the 
> net/ipv4/inet_fragment.c.
> 
> The inet_ prefix in file names, data structures and functions, and
> the code place (net/ipv4) was proposed by Alexey, but the exact
> names were selectd by me, so maybe there can be a better ones.
> 
> Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Thank you for doing this work, I applied it all.
I had to decide whether to apply Herbert's recent patches
first or your's, because either way there would be some
conflicts to resolve.
I handled it the best I could, but it seems OK from here. |  
	|  |  | 
 
 
 Current Time: Sat Oct 25 20:52:00 GMT 2025 
 Total time taken to generate the page: 0.08349 seconds |