| Home » Mailing lists » Devel » Racy /proc creations interfaces Goto Forum:
	| 
		
			| Racy /proc creations interfaces [message #9325] | Wed, 27 December 2006 13:36  |  
			| 
				
				
					|  adobriyan Messages: 80
 Registered: November 2006
 | Member |  |  |  
	| 1. Not setting ->owner for your favourite /proc file is oopsable. 
 $ while true; do cat /proc/fs/xfs/stat 1>/dev/null 2>/dev/null; done
 # while true; do modprobe xfs; rmmod xfs; done
 
 BUG: unable to handle kernel paging request at virtual address fc281049
 printing eip:
 fc281049
 *pde = 37ca7067
 Oops: 0000 [#1]
 Modules linked in: af_packet ohci_hcd e1000 ehci_hcd uhci_hcd usbcore
 CPU:    0
 EIP:    0060:[<fc281049>]    Not tainted VLI
 EFLAGS: 00010282   (2.6.19 #2)
 EIP is at 0xfc281049
 eax: f0705000   ebx: 00000000   ecx: 00000105   edx: f1e5ff68
 esi: f0705000   edi: fc281049   ebp: 00000c00   esp: f1e5ff4c
 ds: 007b   es: 007b   ss: 0068
 Process cat (pid: 32588, ti=f1e5e000 task=f4377030 task.ti=f1e5e000)
 Stack: c0177d9c 00000c00 f1e5ff6c 00000000 00001000 08051000 f7ccc240 00000000
 00000000 edbe82c0 00001000 08051000 f1e5ffa4 c0149f05 f1e5ffa4 c0177be5
 edbe82c0 fffffff7 08051000 f1e5e000 c014a26d f1e5ffa4 00000105 00000000
 Call Trace:
 [<c0177d9c>] proc_file_read+0x1b7/0x262
 [<c0149f05>] vfs_read+0x85/0xf3
 [<c0177be5>] proc_file_read+0x0/0x262
 [<c014a26d>] sys_read+0x41/0x6a
 [<c0102be1>] sysenter_past_esp+0x56/0x79
 =======================
 Code:  Bad EIP value.
 EIP: [<fc281049>] 0xfc281049 SS:ESP 0068:f1e5ff4c
 
 2. After adding ->owner for your favourite /proc file it's still looks racy
 
 pde = create_proc_entry("foo", 0644, NULL);
 /* so what? pde->owner is NULL here */
 if (pde)
 pde->owner = THIS_MODULE;
 
 3. It's more: setting both ->read_proc and ->data after proc entry creation
 should be oopsable too, if ->read_proc operates on data in a non-trivial
 way (or ->write_proc).
 
 sound/oss/trident.c:
 4446				res = create_proc_entry("ALi5451", 0, NULL);
 4447				if (res) {
 4448					res->write_proc = ali_write_proc;
 /* ->data is NULL here. */
 4449					res->data = card;
 4450				}
 
 ali_write_proc() does spin_lock_irqsave on private card lock which is
 believed to be valid.
 
 4. Proposed semantics: PDE should be in maximally working state before
 gluing into main /proc tree (proc_register()).
 
 struct proc_entry_raw foo_pe_raw = {
 .owner = THIS_MODULE,
 .name = "foo",
 .mode = 0644,
 .read_proc = foo_read_proc,
 .data = foo_data,
 .parent = foo_parent,
 };
 
 pde = create_proc_entry(&foo_pe_raw);
 if (!pde)
 return -ENOMEM;
 
 where "struct proc_entry_raw" is cut down version of "struct proc_dir_entry"
 where new create_proc_entry() would do
 
 proc_create() or kmalloc()
 fill in fields from passed "struct proc_entry_raw"
 proc_register()
 
 Comments?
 
 Below is 0-order approximation to what I mean (not tested at all):
 
 --- a/fs/afs/proc.c
 +++ b/fs/afs/proc.c
 @@ -146,17 +146,23 @@ int afs_proc_init(void)
 goto error;
 proc_afs->owner = THIS_MODULE;
 
 -	p = create_proc_entry("cells", 0, proc_afs);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "cells",
 +			.parent = proc_afs,
 +			.proc_fops = &afs_proc_cells_fops,
 +			.owner = THIS_MODULE,
 +		});
 if (!p)
 goto error_proc;
 -	p->proc_fops = &afs_proc_cells_fops;
 -	p->owner = THIS_MODULE;
 
 -	p = create_proc_entry("rootcell", 0, proc_afs);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "rootcell",
 +			.parent = proc_afs,
 +			.proc_fops = &afs_proc_rootcell_fops,
 +			.owner = THIS_MODULE,
 +		});
 if (!p)
 goto error_cells;
 -	p->proc_fops = &afs_proc_rootcell_fops;
 -	p->owner = THIS_MODULE;
 
 _leave(" = 0");
 return 0;
 @@ -429,26 +435,35 @@ int afs_proc_cell_setup(struct afs_cell
 if (!cell->proc_dir)
 return -ENOMEM;
 
 -	p = create_proc_entry("servers", 0, cell->proc_dir);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "servers",
 +			.parent = cell->proc_dir,
 +			.proc_fops = &afs_proc_cell_servers_fops,
 +			.owner = THIS_MODULE,
 +			.data = cell,
 +		});
 if (!p)
 goto error_proc;
 -	p->proc_fops = &afs_proc_cell_servers_fops;
 -	p->owner = THIS_MODULE;
 -	p->data = cell;
 
 -	p = create_proc_entry("vlservers", 0, cell->proc_dir);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "vlservers",
 +			.parent = cell->proc_dir,
 +			.proc_fops = &afs_proc_cell_vlservers_fops,
 +			.owner = THIS_MODULE,
 +			.data = cell,
 +		});
 if (!p)
 goto error_servers;
 -	p->proc_fops = &afs_proc_cell_vlservers_fops;
 -	p->owner = THIS_MODULE;
 -	p->data = cell;
 
 -	p = create_proc_entry("volumes", 0, cell->proc_dir);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "volumes",
 +			.parent = cell->proc_dir,
 +			.proc_fops = &afs_proc_cell_volumes_fops,
 +			.owner = THIS_MODULE,
 +			.data = cell,
 +		});
 if (!p)
 goto error_vlservers;
 -	p->proc_fops = &afs_proc_cell_volumes_fops;
 -	p->owner = THIS_MODULE;
 -	p->data = cell;
 
 _leave(" = 0");
 return 0;
 diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
 index 10fff94..faf0470 100644
 --- a/fs/jbd/journal.c
 +++ b/fs/jbd/journal.c
 @@ -1975,12 +1975,12 @@ #define JBD_PROC_NAME "sys/fs/jbd-debug"
 
 static void __init create_jbd_proc_entry(void)
 {
 -	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
 -	if (proc_jbd_debug) {
 -		/* Why is this so hard? */
 -		proc_jbd_debug->read_proc = read_jbd_debug;
 -		proc_jbd_debug->write_proc = write_jbd_debug;
 -	}
 +	__create_proc_entry(&(struct proc_entry_raw){
 +			.name = JBD_PROC_NAME,
 +			.mode = 0644,
 +			.read_proc = read_jbd_debug,
 +			.write_proc = write_jbd_debug,
 +		});
 }
 
 static void __exit remove_jbd_proc_entry(void)
 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
 index 44fc32b..32de473 100644
 --- a/fs/jbd2/journal.c
 +++ b/fs/jbd2/journal.c
 @@ -1986,12 +1986,12 @@ #define JBD_PROC_NAME "sys/fs/jbd2-debug
 
 static void __init create_jbd_proc_entry(void)
 {
 -	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
 -	if (proc_jbd_debug) {
 -		/* Why is this so hard? */
 -		proc_jbd_debug->read_proc = read_jbd_debug;
 -		proc_jbd_debug->write_proc = write_jbd_debug;
 -	}
 +	__create_proc_entry(&(struct proc_entry_raw){
 +			.name = JBD_PROC_NAME,
 +			.mode = 0644,
 +			.read_proc = read_jbd_debug,
 +			.write_proc = write_jbd_debug,
 +		});
 }
 
 static void __exit jbd2_remove_jbd_proc_entry(void)
 diff --git a/fs/jffs/jffs_proc.c b/fs/jffs/jffs_proc.c
 index 9bdd99a..ec00942 100644
 --- a/fs/jffs/jffs_proc.c
 +++ b/fs/jffs/jffs_proc.c
 @@ -85,12 +85,12 @@ int jffs_register_jffs_proc_dir(int mtd,
 if (!part_root)
 goto out1;
 
 -	/* Create entry for 'info' file */
 -	part_info = create_proc_entry ("info", 0, part_root);
 -	if (!part_info)
 +	if (!__create_proc_entry(&(struct proc_entry_raw){
 +			.name = "info",
 +			.parent = part_root,
 +			.read_proc = jffs_proc_info_read,
 +			.data = (void *)c,}))
 goto out2;
 -	part_info->read_proc = jffs_proc_info_read;
 -	part_info->data = (void *) c;
 
 /* Create entry for 'layout' file */
 part_layout = create_proc_entry ("layout", 0, part_root);
 diff --git a/fs/nfs/client.c b/fs/nfs/client.c
 index 23ab145..44ca278 100644
 --- a/fs/nfs/client.c
 +++ b/fs/nfs/client.c
 @@ -1406,20 +1406,27 @@ int __init nfs_fs_proc_init(void)
 proc_fs_nfs->owner = THIS_MODULE;
 
 /* a file of servers with which we're dealing */
 -	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "servers",
 +			.mode = S_IFREG|S_IRUGO,
 +			.parent = proc_fs_nfs,
 +			.proc_fops = &nfs_server_list_fops,
 +			.owner = THIS_MODULE,
 +		});
 if (!p)
 goto error_1;
 
 -	p->proc_fops = &nfs_server_list_fops;
 -	p->owner = THIS_MODULE;
 -
 /* a file of volumes that we have mounted */
 -	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
 +	p = __create_proc_entry(&(struct proc_entry_raw){
 +			.name = "volumes",
 +			.mode = S_IFREG|S_IRUGO,
 +			.parent = proc_fs_nfs,
 +			.proc_fops = &nfs_volume_list_fops,
 +			.owner = THIS_MODULE,
 +		});
 if (!p)
 goto error_2;
 
 -	p->proc_fops = &nfs_volume_list_fops;
 -	p->owner = THIS_MODULE;
 return 0;
 
 error_2:
 diff --git a/fs/proc/generic.c b/fs/proc/generic.c
 index 853cb87..0bce166 100644
 --- a/fs/proc/generic.c
 +++ b/fs/proc/generic.c
 @@ -657,6 +657,47 @@ struct proc_dir_entry *proc_mkdir(const
 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
 }
 
 +struct proc_dir_entry *__create_proc_entry(struct proc_entry_raw *raw)
 +{
 +	struct proc_dir_entry *ent;
 +	mode_t mode = raw->mode;
 +	nlink_t nlink;
 +
 +	if (S_ISDIR(mode)) {
 +		if ((mode & S_IALLUGO) == 0)
 +			mode |= S_IRUGO | S_IXUGO;
 +		nlink = 2;
 +	} else {
 +		if ((mode & S_IFMT) == 0)
 +			mode |= S_IFREG;
 +		if ((mode & S_IALLUGO) == 0)
 +			mode |= S_IRUGO;
 +		nlink = 1;
 +	}
 +
 +	ent = proc_create(&raw->parent, raw->name, mode, nlink);
 +	if (ent) {
 +		if (S_ISDIR(mode)) {
 +			ent->proc_fops = &proc_dir_operations;
 +			ent->proc_iops = &proc_dir_inode_operations;
 +		}
 +
 +		ent->size = raw->size;
 +		if (raw->proc_fops)
 +			ent->proc_fops = raw->proc_fops;
 +		ent->data = raw->data;
 +		ent->read_proc = raw->read_proc;
 +		ent->write_proc = raw->write_proc;
 +		ent->owner = raw->owner;
 +
 +		if (proc_register(raw->parent, ent) < 0) {
 +			kfree(ent);
 +			ent = NULL;
 +		}
 +	}
 +	return ent;
 +}
 +
 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 struct proc_dir_entry *parent)
 {
 diff --git a/fs/proc/root.c b/fs/proc/root.c
 index 64d242b..f7d3fda 100644
 --- a/fs/proc/root.c
 +++ b/fs/proc/root.c
 @@ -166,6 +166,7 @@ struct proc_dir_entry proc_root = {
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_S
...
 
 
 |  
	|  |  |  
	| 
		
			| Re: Racy /proc creations interfaces [message #9344 is a reply to message #9325] | Wed, 27 December 2006 13:56   |  
			| 
				
				
					|  Al Viro Messages: 5
 Registered: March 2006
 | Junior Member |  |  |  
	| On Wed, Dec 27, 2006 at 04:42:23PM +0300, Alexey Dobriyan wrote: >
 >    	struct proc_entry_raw foo_pe_raw = {
 > 		.owner = THIS_MODULE,
 > 		.name = "foo",
 > 		.mode = 0644,
 > 		.read_proc = foo_read_proc,
 > 		.data = foo_data,
 > 		.parent = foo_parent,
 > 	};
 >
 > 	pde = create_proc_entry(&foo_pe_raw);
 > 	if (!pde)
 > 		return -ENOMEM;
 >
 >    where "struct proc_entry_raw" is cut down version of "struct proc_dir_entry"
 
 Ewwwwwwwwwwwwwww
 
 Please, please no.  Especially not .parent.  If anything, let's add a
 helper saying "it's all set up now".  And turn create_proc_entry()
 into a macro that would pass THIS_MODULE to underlying function and
 call that helper, so that simple cases wouldn't have to bother at all.
 |  
	|  |  |  
	| 
		
			| Re: Racy /proc creations interfaces [message #9352 is a reply to message #9344] | Thu, 28 December 2006 08:15   |  
			| 
				
				
					|  adobriyan Messages: 80
 Registered: November 2006
 | Member |  |  |  
	| On Wed, Dec 27, 2006 at 01:56:24PM +0000, Al Viro wrote: > On Wed, Dec 27, 2006 at 04:42:23PM +0300, Alexey Dobriyan wrote:
 > >
 > >    	struct proc_entry_raw foo_pe_raw = {
 > > 		.owner = THIS_MODULE,
 > > 		.name = "foo",
 > > 		.mode = 0644,
 > > 		.read_proc = foo_read_proc,
 > > 		.data = foo_data,
 > > 		.parent = foo_parent,
 > > 	};
 > >
 > > 	pde = create_proc_entry(&foo_pe_raw);
 > > 	if (!pde)
 > > 		return -ENOMEM;
 > >
 > >    where "struct proc_entry_raw" is cut down version of "struct proc_dir_entry"
 >
 > Ewwwwwwwwwwwwwww
 >
 > Please, please no.  Especially not .parent.  If anything, let's add a
 > helper saying "it's all set up now".  And turn create_proc_entry()
 > into a macro that would pass THIS_MODULE to underlying function and
 > call that helper, so that simple cases wouldn't have to bother at all.
 
 People are setting ->data after create_proc_entry():
 
 drivers/zorro/proc.c:
 110	static int __init zorro_proc_attach_device(u_int slot)
 111	{
 112		struct proc_dir_entry *entry;
 113		char name[4];
 114
 115		sprintf(name, "%02x", slot);
 116		entry = create_proc_entry(name, 0, proc_bus_zorro_dir);
 117		if (!entry)
 118			return -ENOMEM;
 119		entry->proc_fops = &proc_bus_zorro_operations;
 120		entry->data = &zorro_autocon[slot];
 121		entry->size = sizeof(struct zorro_dev);
 
 If create_proc_entry is a macro doing what you suggest (am I right?)
 
 #define create_proc_entry(name, mode, parent)
 ({
 struct proc_dir_entry *pde;
 
 pde = __create_proc_entry(name, mode, parent, THIS_MODULE);
 if (pde)
 mark_proc_entry_ready(pde);
 pde;
 })
 
 there is still a problem because we want it to be equivalent to
 
 pde = create_proc_entry(...);
 if (!pde)
 return -ENOMEM;
 pde->proc_fops = ...;
 pde->data = ...;
 mark_proc_entry_ready(pde);
 |  
	|  |  |  
	| 
		
			| [PATCH] Fix rmmod/read/write races in /proc entries [message #9654 is a reply to message #9352] | Mon, 15 January 2007 15:33   |  
			| 
				
				
					|  adobriyan Messages: 80
 Registered: November 2006
 | Member |  |  |  
	| -------------------------------------------------------- pde = create_proc_entry()
 if (!pde)
 return -ENOMEM;
 pde->write_proc = ...;
 open
 write
 copy_from_user
 pde = create_proc_entry();
 if (!pde) {
 remove_proc_entry();
 return -ENOMEM;
 /* module unloaded */
 }
 *boom*
 --------------------------------------------------------
 pde = create_proc_entry();
 if (pde) {
 /* which dereferences ->data */
 pde->write_proc = ...;
 open
 write
 pde->data = ...;
 }
 --------------------------------------------------------
 
 The following plan is going to be executed (as per Al Viro's explanations):
 
 PDE gets atomic counter counting reads and writes in progress
 via ->read_proc, ->write_proc, ->get_info . Generic proc code will bump
 PDE's counter before calling into module-specific method and decrement
 it after it returns.
 
 remove_proc_entry() will wait until all readers and writers are done.
 To do this reliably it will set ->proc_fops to NULL and generic proc
 code won't call into module it it sees NULL ->proc_fops.
 
 This patch implements part above. So far, no changes in modules code
 required. To proceed, lets look into ->proc_fops values during and after PDE
 creation:
 
 pde = create_proc_entry();
 if (pde)
 pde->proc_fops = ...;
 
 proc_create() create empty PDE;		(->proc_fops is NULL)
 proc_register() glues PDE to /proc	(->proc_fops is NULL)
 proc_register() sets ->proc_fops to default	(->proc_fops valid)
 [ module sets ->proc_fops to it's own	(->proc_fops) valid ]
 
 Observation: ->proc_fops is not NULL, when create_proc_entry() exits.
 
 Next set of races come into play:
 
 pde = create_proc_entry();
 if (pde) {
 pde->read_proc = ...;
 pde->data = ...;
 }
 
 Almost all ->read_proc, ->write_proc callbacks assume that ->data is valid when
 they're called. They cast ->data and dereference it.
 
 To fix this we need a way to indicate that PDE is not readable and writeable.
 ->proc_fops nicely fits, because modules setting ->proc_fops only don't need
 changes at all. create_proc_entry() will exit with NULL ->proc_fops and helpers
 will be needed sometimes to set it.
 
 1. Module sets ->proc_fops only
 no changes
 2. Module sets ->data and ->proc_fops
 use a helper to set ->data before ->proc_fops and a barrier.
 2. Module uses only create_proc_read_entry()
 changes only in create_proc_read_entry();
 3. Module uses only create_proc_info_entry()
 changes only in create_proc_info_entry();
 4. Module sets combination of ->data, ->read_proc, ->write_proc
 use helper which will set fields, barrier and sets default ->proc_fops.
 the best name I've come up so far is
 
 void set_proc_entry_data_rw(struct proc_dir_entry *, void *, read_proc_t, write_proc_t);
 
 Helper(s) will be introduced, then create_proc_entry() will start exiting with
 NULL ->proc_fops. After that use of helper(s) will become mandatory. Grepping
 for offenders will be easy (read_proc, ... are good names). ->data is bad
 name, however, after helpers will be plugged we can rename ->data to
 ->pde_data and it'll become good name.
 
 Sorry, for somewhat chaotic explanations, please, comment on patch and RFC.
 
 Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
 ---
 
 fs/proc/generic.c       |   32 ++++++++++++++++++++++++++++----
 include/linux/proc_fs.h |    2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)
 
 --- a/fs/proc/generic.c
 +++ b/fs/proc/generic.c
 @@ -19,6 +19,7 @@ #include <linux/init.h>
 #include <linux/idr.h>
 #include <linux/namei.h>
 #include <linux/bitops.h>
 +#include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
 @@ -76,6 +77,12 @@ proc_file_read(struct file *file, char _
 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
 return -ENOMEM;
 
 +	if (!dp->proc_fops)
 +		goto out_free;
 +	atomic_inc(&dp->pde_users);
 +	if (!dp->proc_fops)
 +		goto out_dec;
 +
 while ((nbytes > 0) && !eof) {
 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
 
 @@ -195,6 +202,9 @@ proc_file_read(struct file *file, char _
 buf += n;
 retval += n;
 }
 +out_dec:
 +	atomic_dec(&dp->pde_users);
 +out_free:
 free_page((unsigned long) page);
 return retval;
 }
 @@ -205,14 +215,20 @@ proc_file_write(struct file *file, const
 {
 struct inode *inode = file->f_path.dentry->d_inode;
 struct proc_dir_entry * dp;
 +	ssize_t rv;
 
 dp = PDE(inode);
 
 -	if (!dp->write_proc)
 +	if (!dp->write_proc || !dp->proc_fops)
 return -EIO;
 
 -	/* FIXME: does this routine need ppos?  probably... */
 -	return dp->write_proc(file, buffer, count, dp->data);
 +	rv = -EIO;
 +	atomic_inc(&dp->pde_users);
 +	if (dp->proc_fops)
 +		/* FIXME: does this routine need ppos?  probably... */
 +		rv = dp->write_proc(file, buffer, count, dp->data);
 +	atomic_dec(&dp->pde_users);
 +	return rv;
 }
 
 
 @@ -717,12 +733,20 @@ void remove_proc_entry(const char *name,
 if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
 goto out;
 len = strlen(fn);
 -
 +again:
 spin_lock(&proc_subdir_lock);
 for (p = &parent->subdir; *p; p=&(*p)->next ) {
 if (!proc_match(len, fn, *p))
 continue;
 de = *p;
 +
 +		de->proc_fops = NULL;
 +		if (atomic_read(&de->pde_users) > 0) {
 +			spin_unlock(&proc_subdir_lock);
 +			msleep(1);
 +			goto again;
 +		}
 +
 *p = de->next;
 de->next = NULL;
 if (S_ISDIR(de->mode))
 --- a/include/linux/proc_fs.h
 +++ b/include/linux/proc_fs.h
 @@ -66,6 +66,8 @@ struct proc_dir_entry {
 atomic_t count;		/* use count */
 int deleted;		/* delete flag */
 void *set;
 +	atomic_t pde_users;	/* number of readers + number of writers via
 +				 * ->read_proc, ->write_proc, ->get_info */
 };
 
 struct kcore_list {
 |  
	|  |  |  
	| 
		
			| Re: [PATCH] Fix rmmod/read/write races in /proc entries [message #9819 is a reply to message #9654] | Tue, 23 January 2007 20:58   |  
			| 
				
				
					|  Andrew Morton Messages: 127
 Registered: December 2005
 | Senior Member |  |  |  
	| On Mon, 15 Jan 2007 18:39:27 +0300 Alexey Dobriyan <adobriyan@openvz.org> wrote:
 
 > --------------------------------------------------------
 > pde = create_proc_entry()
 > if (!pde)
 > 	return -ENOMEM;
 > pde->write_proc = ...;
 > 				open
 > 				write
 > 				copy_from_user
 > pde = create_proc_entry();
 > if (!pde) {
 > 	remove_proc_entry();
 > 	return -ENOMEM;
 > 	/* module unloaded */
 > }
 > 				*boom*
 > --------------------------------------------------------
 > pde = create_proc_entry();
 > if (pde) {
 > 	/* which dereferences ->data */
 > 	pde->write_proc = ...;
 > 				open
 > 				write
 > 	pde->data = ...;
 > }
 > --------------------------------------------------------
 >
 > The following plan is going to be executed (as per Al Viro's explanations):
 >
 > PDE gets atomic counter counting reads and writes in progress
 > via ->read_proc, ->write_proc, ->get_info . Generic proc code will bump
 > PDE's counter before calling into module-specific method and decrement
 > it after it returns.
 >
 > remove_proc_entry() will wait until all readers and writers are done.
 > To do this reliably it will set ->proc_fops to NULL and generic proc
 > code won't call into module it it sees NULL ->proc_fops.
 >
 > This patch implements part above. So far, no changes in modules code
 > required. To proceed, lets look into ->proc_fops values during and after PDE
 > creation:
 >
 > 	pde = create_proc_entry();
 > 	if (pde)
 > 		pde->proc_fops = ...;
 >
 > proc_create() create empty PDE;		(->proc_fops is NULL)
 > proc_register() glues PDE to /proc	(->proc_fops is NULL)
 > proc_register() sets ->proc_fops to default	(->proc_fops valid)
 > [ module sets ->proc_fops to it's own	(->proc_fops) valid ]
 >
 > Observation: ->proc_fops is not NULL, when create_proc_entry() exits.
 >
 > Next set of races come into play:
 >
 > 	pde = create_proc_entry();
 > 	if (pde) {
 > 		pde->read_proc = ...;
 > 		pde->data = ...;
 > 	}
 >
 > Almost all ->read_proc, ->write_proc callbacks assume that ->data is valid when
 > they're called. They cast ->data and dereference it.
 >
 > To fix this we need a way to indicate that PDE is not readable and writeable.
 > ->proc_fops nicely fits, because modules setting ->proc_fops only don't need
 > changes at all. create_proc_entry() will exit with NULL ->proc_fops and helpers
 > will be needed sometimes to set it.
 >
 > 1. Module sets ->proc_fops only
 > 	no changes
 > 2. Module sets ->data and ->proc_fops
 > 	use a helper to set ->data before ->proc_fops and a barrier.
 > 2. Module uses only create_proc_read_entry()
 > 	changes only in create_proc_read_entry();
 > 3. Module uses only create_proc_info_entry()
 > 	changes only in create_proc_info_entry();
 > 4. Module sets combination of ->data, ->read_proc, ->write_proc
 > 	use helper which will set fields, barrier and sets default ->proc_fops.
 > 	the best name I've come up so far is
 >
 > 	void set_proc_entry_data_rw(struct proc_dir_entry *, void *, read_proc_t, write_proc_t);
 >
 > Helper(s) will be introduced, then create_proc_entry() will start exiting with
 > NULL ->proc_fops. After that use of helper(s) will become mandatory. Grepping
 > for offenders will be easy (read_proc, ... are good names). ->data is bad
 > name, however, after helpers will be plugged we can rename ->data to
 > ->pde_data and it'll become good name.
 >
 > Sorry, for somewhat chaotic explanations, please, comment on patch and RFC.
 
 <head spins>
 
 Looks a bit hacky.  Can this race not be fixed by addition of suitable
 locking, or possibly refcounting-under-locking?
 
 > Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
 > ---
 >
 >  fs/proc/generic.c       |   32 ++++++++++++++++++++++++++++----
 >  include/linux/proc_fs.h |    2 ++
 >  2 files changed, 30 insertions(+), 4 deletions(-)
 >
 > --- a/fs/proc/generic.c
 > +++ b/fs/proc/generic.c
 > @@ -19,6 +19,7 @@ #include <linux/init.h>
 >  #include <linux/idr.h>
 >  #include <linux/namei.h>
 >  #include <linux/bitops.h>
 > +#include <linux/delay.h>
 >  #include <linux/spinlock.h>
 >  #include <asm/uaccess.h>
 >
 > @@ -76,6 +77,12 @@ proc_file_read(struct file *file, char _
 >  	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
 >  		return -ENOMEM;
 >
 > +	if (!dp->proc_fops)
 > +		goto out_free;
 > +	atomic_inc(&dp->pde_users);
 > +	if (!dp->proc_fops)
 > +		goto out_dec;
 > +
 
 You'll be shocked to know that I'd prefer more comments in there.  Enough
 for a later maintainer to be able to understand what's going on.
 
 
 >  	while ((nbytes > 0) && !eof) {
 >  		count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
 >
 > @@ -195,6 +202,9 @@ proc_file_read(struct file *file, char _
 >  		buf += n;
 >  		retval += n;
 >  	}
 > +out_dec:
 > +	atomic_dec(&dp->pde_users);
 > +out_free:
 >  	free_page((unsigned long) page);
 >  	return retval;
 >  }
 > @@ -205,14 +215,20 @@ proc_file_write(struct file *file, const
 >  {
 >  	struct inode *inode = file->f_path.dentry->d_inode;
 >  	struct proc_dir_entry * dp;
 > +	ssize_t rv;
 >
 >  	dp = PDE(inode);
 >
 > -	if (!dp->write_proc)
 > +	if (!dp->write_proc || !dp->proc_fops)
 >  		return -EIO;
 >
 > -	/* FIXME: does this routine need ppos?  probably... */
 > -	return dp->write_proc(file, buffer, count, dp->data);
 > +	rv = -EIO;
 > +	atomic_inc(&dp->pde_users);
 > +	if (dp->proc_fops)
 > +		/* FIXME: does this routine need ppos?  probably... */
 > +		rv = dp->write_proc(file, buffer, count, dp->data);
 > +	atomic_dec(&dp->pde_users);
 > +	return rv;
 >  }
 
 Here too.
 
 >
 > @@ -717,12 +733,20 @@ void remove_proc_entry(const char *name,
 >  	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
 >  		goto out;
 >  	len = strlen(fn);
 > -
 > +again:
 >  	spin_lock(&proc_subdir_lock);
 >  	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 >  		if (!proc_match(len, fn, *p))
 >  			continue;
 >  		de = *p;
 > +
 > +		de->proc_fops = NULL;
 > +		if (atomic_read(&de->pde_users) > 0) {
 > +			spin_unlock(&proc_subdir_lock);
 > +			msleep(1);
 > +			goto again;
 > +		}
 > +
 
 And here.
 
 >  		*p = de->next;
 >  		de->next = NULL;
 >  		if (S_ISDIR(de->mode))
 > --- a/include/linux/proc_fs.h
 > +++ b/include/linux/proc_fs.h
 > @@ -66,6 +66,8 @@ struct proc_dir_entry {
 >  	atomic_t count;		/* use count */
 >  	int deleted;		/* delete flag */
 >  	void *set;
 > +	atomic_t pde_users;	/* number of readers + number of writers via
 > +				 * ->read_proc, ->write_proc, ->get_info */
 >  };
 >
 >  struct kcore_list {
 |  
	|  |  |  
	| 
		
			| Re: [PATCH] Fix rmmod/read/write races in /proc entries [message #9837 is a reply to message #9819] | Wed, 24 January 2007 15:16  |  
			| 
				
				
					|  adobriyan Messages: 80
 Registered: November 2006
 | Member |  |  |  
	| On Tue, Jan 23, 2007 at 12:58:01PM -0800, Andrew Morton wrote: > <head spins>
 >
 > Looks a bit hacky.  Can this race not be fixed by addition of suitable
 > locking, or possibly refcounting-under-locking?
 
 I'll think about it.
 
 > > @@ -76,6 +77,12 @@ proc_file_read(struct file *file, char _
 > >  	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
 > >  		return -ENOMEM;
 > >
 > > +	if (!dp->proc_fops)
 > > +		goto out_free;
 > > +	atomic_inc(&dp->pde_users);
 > > +	if (!dp->proc_fops)
 > > +		goto out_dec;
 > > +
 >
 > You'll be shocked to know that I'd prefer more comments in there.  Enough
 > for a later maintainer to be able to understand what's going on.
 
 Here is replacement patch with rewritten changelog and comments in
 place. HTH.
 
 
 
 [PATCH] Fix rmmod/read/write races in /proc entries
 
 Current /proc creation interfaces suffer from at least two types of races:
 --------------------------------------------------------
 1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
 meanwhile.
 
 pde = create_proc_entry()
 if (!pde)
 return -ENOMEM;
 pde->write_proc = ...
 open
 write
 copy_from_user
 pde = create_proc_entry();
 if (!pde) {
 remove_proc_entry();
 return -ENOMEM;
 /* module unloaded */
 }
 *boom*
 --------------------------------------------------------
 2. Read/write happens when PDE only partially initialized. ->data is NULL
 when create_proc_entry() returns. Almost all ->read_proc and
 ->write_proc handlers assume that ->data is valid.
 
 pde = create_proc_entry();
 if (pde) {
 /* which dereferences ->data */
 pde->write_proc = ...
 open
 write
 pde->data = ...
 }
 --------------------------------------------------------
 
 The following plan is going to be executed (as per Al Viro's explanations):
 
 PDE gets atomic counter counting reads and writes in progress done
 via ->read_proc, ->write_proc, ->get_info . Generic proc code will bump
 PDE's counter before calling into module-specific method and decrement
 it after it returns.
 
 remove_proc_entry() will wait until all readers and writers are done.
 To do this reliably it will set ->proc_fops to NULL and generic proc
 code won't call into module it it sees NULL ->proc_fops.
 
 This patch implements part above. So far, no changes in proc users
 required. Patch fixes races of type 1.
 
 
 
 Unfortunately, fixing races of type #2 will require changing in some modules.
 
 We need an indicator of PDE readinness of accepting reads and writes.
 ->proc_fops nicely fits. It is going to get new semantics:
 
 * if ->proc_fops is valid, PDE will accept reads and writes via ->read_proc,
 ->write_proc, ->get_info.
 * if ->proc_fops is NULL, PDE won't call into module's code.
 
 remove_proc_entry() and only remove_proc_entry() will clear ->proc_fops.
 create_proc_entry() will not set ->proc_fops. Helpers will be required.
 
 Helpers will set ->proc_fops last (after ->data, particularly).
 
 set_proc_entry_data_fops(pde, data, fops);
 set_proc_entry_data_read_write(pde, data, read_proc, write_proc);
 
 When all necessary helpers will be plugged, create_proc_entry will stop
 setting default proc_fops and helpers will start setting it. Races of
 type #2 will be fixed.
 
 If module sets ->proc_fops only, or uses create_proc_read_entry(), or uses
 create_proc_info_entry(), there won't be any changes for module.
 ---
 
 fs/proc/generic.c       |   61 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/proc_fs.h |   15 +++++++++++
 2 files changed, 73 insertions(+), 3 deletions(-)
 
 --- a/fs/proc/generic.c
 +++ b/fs/proc/generic.c
 @@ -19,6 +19,7 @@ #include <linux/init.h>
 #include <linux/idr.h>
 #include <linux/namei.h>
 #include <linux/bitops.h>
 +#include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
 @@ -76,6 +77,25 @@ proc_file_read(struct file *file, char _
 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
 return -ENOMEM;
 
 +	if (!dp->proc_fops)
 +		/*
 +		 * remove_proc_entry() marked PDE as "going away".
 +		 * No new readers allowed.
 +		 */
 +		goto out_free;
 +	/*
 +	 * We are going to call into module's code via ->get_info or
 +	 * ->read_proc. Bump refcount so that remove_proc_entry() will
 +	 * wait for read to complete.
 +	 */
 +	atomic_inc(&dp->pde_users);
 +	if (!dp->proc_fops)
 +		/*
 +		 * While we're busy bumping refcount, remove_proc_entry()
 +		 * marked PDE as "going away". Obey.
 +		 */
 +		goto out_dec;
 +
 while ((nbytes > 0) && !eof) {
 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
 
 @@ -195,6 +215,9 @@ proc_file_read(struct file *file, char _
 buf += n;
 retval += n;
 }
 +out_dec:
 +	atomic_dec(&dp->pde_users);
 +out_free:
 free_page((unsigned long) page);
 return retval;
 }
 @@ -205,14 +228,33 @@ proc_file_write(struct file *file, const
 {
 struct inode *inode = file->f_path.dentry->d_inode;
 struct proc_dir_entry * dp;
 +	ssize_t rv;
 
 dp = PDE(inode);
 
 if (!dp->write_proc)
 return -EIO;
 +	/*
 +	 * remove_proc_entry() marked PDE as "going away".
 +	 * No new writers allowed.
 +	 */
 +	if (!dp->proc_fops)
 +		return -EIO;
 
 -	/* FIXME: does this routine need ppos?  probably... */
 -	return dp->write_proc(file, buffer, count, dp->data);
 +	rv = -EIO;
 +	/*
 +	 * We are going to call into module's code via ->write_proc .
 +	 * Bump refcount so that module won't dissapear while ->write_proc
 +	 * sleeps in copy_from_user(). remove_proc_entry() will wait for
 +	 * write to complete.
 +	 */
 +	atomic_inc(&dp->pde_users);
 +	if (dp->proc_fops)
 +		/* PDE is ready, refcount bumped, call into module. */
 +		/* FIXME: does this routine need ppos?  probably... */
 +		rv = dp->write_proc(file, buffer, count, dp->data);
 +	atomic_dec(&dp->pde_users);
 +	return rv;
 }
 
 
 @@ -717,12 +759,25 @@ void remove_proc_entry(const char *name,
 if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
 goto out;
 len = strlen(fn);
 -
 +again:
 spin_lock(&proc_subdir_lock);
 for (p = &parent->subdir; *p; p=&(*p)->next ) {
 if (!proc_match(len, fn, *p))
 continue;
 de = *p;
 +
 +		/*
 +		 * Stop accepting new readers/writers. If you're dynamically
 +		 * allocating ->proc_fops, save a pointer somewhere.
 +		 */
 +		de->proc_fops = NULL;
 +		/* Wait until all readers/writers are done. */
 +		if (atomic_read(&de->pde_users) > 0) {
 +			spin_unlock(&proc_subdir_lock);
 +			msleep(1);
 +			goto again;
 +		}
 +
 *p = de->next;
 de->next = NULL;
 if (S_ISDIR(de->mode))
 --- a/include/linux/proc_fs.h
 +++ b/include/linux/proc_fs.h
 @@ -56,6 +56,19 @@ struct proc_dir_entry {
 gid_t gid;
 loff_t size;
 struct inode_operations * proc_iops;
 +	/*
 +	 * NULL ->proc_fops means "PDE is going away RSN" or
 +	 * "PDE is just created". In either case ->get_info, ->read_proc,
 +	 * ->write_proc won't be called because it's too late or too early,
 +	 * respectively.
 +	 *
 +	 * Valid ->proc_fops means "use this file_operations" or
 +	 * "->data is setup, it's safe to call ->read_proc, ->write_proc which
 +	 * dereference it".
 +	 *
 +	 * If you're allocating ->proc_fops dynamically, save a pointer
 +	 * somewhere.
 +	 */
 const struct file_operations * proc_fops;
 get_info_t *get_info;
 struct module *owner;
 @@ -66,6 +79,8 @@ struct proc_dir_entry {
 atomic_t count;		/* use count */
 int deleted;		/* delete flag */
 void *set;
 +	atomic_t pde_users;	/* number of readers + number of writers via
 +				 * ->read_proc, ->write_proc, ->get_info */
 };
 
 struct kcore_list {
 |  
	|  |  | 
 
 
 Current Time: Fri Oct 31 08:36:26 GMT 2025 
 Total time taken to generate the page: 0.16400 seconds |