OpenVZ Forum


Home » Mailing lists » Devel » [PATCH 1/1] namespaces: introduce sys_hijack (v6)
[PATCH 1/1] namespaces: introduce sys_hijack (v6) [message #21917] Wed, 17 October 2007 21:54
serue is currently offline  serue
Messages: 750
Registered: February 2006
Senior Member
Ok hopefully friday I'll have some time to figure out the proper
way to do hijack on a cgroup (and maybe do a s390 or powerpc
version of the syscall), but in the meantime here is the the
current incarnation of the hijack patch.

-serge

>From 4b1f38e451519d58b710dc746137348f392b160f Mon Sep 17 00:00:00 2001
From: sergeh@us.ibm.com <hallyn@kernel.(none)>
Date: Tue, 16 Oct 2007 09:36:49 -0700
Subject: [PATCH 1/1] namespaces: introduce sys_hijack (v6)

Move most of do_fork() into a new do_fork_task() which acts on
a new argument, task, rather than on current.  do_fork() becomes
a call to do_fork_task(current, ...).

Introduce sys_hijack (for x86 only so far).  It is like clone, but
 in place of a stack pointer (which is assumed null) it accepts a
pid.  The process identified by that pid is the one which is
actually cloned.  Some state - include the file table, the signals
and sighand (and hence tty), and the ->parent are taken from the
calling process.

The effect is a sort of namespace enter.  The following program
uses sys_hijack to 'enter' all namespaces of the specified pid.
For instance in one terminal, do

	mount -t cgroup -ons /cgroup
	hostname
	  qemu
	ns_exec -u /bin/sh
	  hostname serge
          echo $$
            1073
	  cat /proc/$$/cgroup
	    ns:/node_1073

In another terminal then do

	hostname
	  qemu
	cat /proc/$$/cgroup
	  ns:/
	hijack 1073
	  hostname
	    serge
	  cat /proc/$$/cgroup
	    ns:/node_1073

Changelog:
	Aug 23: send a stop signal to the hijacked process
		(like ptrace does).
	Oct 09: Update for 2.6.23-rc8-mm2 (mainly pidns)
		Don't take task_lock under rcu_read_lock
		Send hijacked process to cgroup_fork() as
		the first argument.
		Removed some unneeded task_locks.
	Oct 16: Fix bug introduced into alloc_pid.
	Oct 16: Add 'int which' argument to sys_hijack to
		allow later expansion to use cgroup in place
		of pid to specify what to hijack.

==============================================================
hijack.c
==============================================================

int do_clone_task(void)
{
	execl("/bin/sh", "/bin/sh", NULL);
}

int main(int argc, char *argv[])
{
	int pid;
	int ret;
	int status;

	if (argc < 2)
		return 1;
	pid = atoi(argv[1]);

	ret = syscall(327, SIGCHLD, 1, (unsigned long) pid);

	if  (ret == 0) {
		return do_clone_task();
	} else if (ret < 0) {
		perror("sys_hijack");
	} else {
		printf("waiting on cloned process %d\n", ret);
		while(waitpid(-1, &status, __WALL) != -1)
				;
		printf("cloned process %d exited with %d\n", ret, status);
	}

	return ret;
}
==============================================================

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
---
 arch/i386/kernel/process.c       |   75 +++++++++++++++++++++++++++++++++++++-
 arch/i386/kernel/syscall_table.S |    1 +
 arch/s390/kernel/process.c       |   12 +++++-
 include/asm-i386/unistd.h        |    3 +-
 include/linux/cgroup.h           |    5 ++-
 include/linux/ptrace.h           |    1 +
 include/linux/sched.h            |    8 ++++
 include/linux/syscalls.h         |    1 +
 kernel/cgroup.c                  |    8 ++--
 kernel/fork.c                    |   67 +++++++++++++++++++++++++---------
 kernel/ptrace.c                  |    7 ++++
 11 files changed, 159 insertions(+), 29 deletions(-)

diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index bfcd01e..cc18a23 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -455,8 +455,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 	unsigned long unused,
 	struct task_struct * p, struct pt_regs * regs)
 {
+	return copy_a_thread(current, nr, clone_flags, esp, unused,
+		p, regs);
+}
+
+int copy_a_thread(struct task_struct *tsk, int nr, unsigned long clone_flags,
+	unsigned long esp, unsigned long unused,
+	struct task_struct * p, struct pt_regs * regs)
+{
 	struct pt_regs * childregs;
-	struct task_struct *tsk;
 	int err;
 
 	childregs = task_pt_regs(p);
@@ -471,7 +478,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 
 	savesegment(gs,p->thread.gs);
 
-	tsk = current;
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
 						IO_BITMAP_BYTES, GFP_KERNEL);
@@ -783,6 +789,71 @@ asmlinkage int sys_clone(struct pt_regs regs)
 	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
 }
 
+static int hijack_pid(struct pt_regs regs)
+{
+	unsigned long clone_flags = regs.ebx;
+	pid_t pid = regs.edx;
+	struct task_struct *task;
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+	task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+	
+	if (task) {
+		task_lock(task);
+		put_task_struct(task);
+	}
+
+	if (task) {
+		if (!ptrace_may_attach_locked(task)) {
+			ret = -EPERM;
+			goto out_put_task;
+		}
+		if (task->ptrace) {
+			ret = -EBUSY;
+			goto out_put_task;
+		}
+		force_sig_specific(SIGSTOP, task);
+
+		task_unlock(task);
+		ret = do_fork_task(task, clone_flags, regs.esp, &regs, 0,
+			NULL, NULL);
+		wake_up_process(task);
+		task = NULL;
+	}
+
+out_put_task:
+	if (task)
+		task_unlock(task);
+	return ret;
+}
+
+static int hijack_cgroup(struct pt_regs regs)
+{
+	unsigned long clone_flags = regs.ebx;
+	int fd = regs.edx;
+
+	return -ENOSYS;
+}
+
+asmlinkage int sys_hijack(struct pt_regs regs)
+{
+	int which = regs.ecx;
+
+	switch (which) {
+	case HIJACK_PID:
+		return hijack_pid(regs);
+	case HIJACK_CGROUP:
+		return hijack_cgroup(regs);
+	default:
+		return -EINVAL;
+	}
+
+}
+
 /*
  * This is trivial, and on the face of it looks like it
  * could equally well be done in user mode.
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index df6e41e..495930c 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -326,3 +326,4 @@ ENTRY(sys_call_table)
 	.long sys_fallocate
 	.long sys_revokeat		/* 325 */
 	.long sys_frevoke
+	.long sys_hijack
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 70c5737..f256e7a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -223,6 +223,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
 	unsigned long unused,
         struct task_struct * p, struct pt_regs * regs)
 {
+	return copy_a_thread(current, nr, clone_flags, new_stackp, unused,
+				 p, regs);
+}
+
+int copy_a_thread(struct task_struct *task, int nr, unsigned long clone_flags,
+	unsigned long new_stackp, unsigned long unused,
+        struct task_struct * p, struct pt_regs * regs)
+{
         struct fake_frame
           {
 	    struct stack_frame sf;
@@ -251,8 +259,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
 	 * save fprs to current->thread.fp_regs to merge them with
 	 * the emulated registers and then copy the result to the child.
 	 */
-	save_fp_regs(&current->thread.fp_regs);
-	memcpy(&p->thread.fp_regs, &current->thread.fp_regs,
+	save_fp_regs(&task->thread.fp_regs);
+	memcpy(&p->thread.fp_regs, &task->thread.fp_regs,
 	       sizeof(s390_fp_regs));
         p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _SEGMENT_TABLE;
 	/* Set a new TLS ?  */
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 006c1b3..fe6eeb4 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -332,10 +332,11 @@
 #define __NR_fallocate		324
 #define __NR_revokeat		325
 #define __NR_frevoke		326
+#define __NR_hijack		327
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 327
+#define NR_syscalls 328
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8747932..cb6d335 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -26,7 +26,7 @@ extern int cgroup_init(void);
 extern void cgroup_init_smp(void);
 extern void cgroup_lock(void);
 extern void cgroup_unlock(void);
-extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork(struct task_struct *parent, struct task_struct *p);
 extern void cgroup_fork_callbacks(struct task_struct *p);
 extern void cgroup_post_fork(struct task_struct *p);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
@@ -309,7 +309,8 @@ void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 static inline void cgroup_init_smp(void) {}
-static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork(struct task_struct *parent,
+					 struct task_struct *p) {}
 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
 static inline void cgroup_post_fork(struct task_struct *p) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ae8146a..727a4a9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -97,6 +97,7 @@ extern void __ptrace_link(struct task_struct *child,
 extern void __ptrace_unlink(struct task_struct *child);
 extern void ptrace_untrace(struct task_struct *child);
 extern int ptrace_may_attach(struct task_struct *task);
+extern int ptrace_may_attach_locked(struct task_struct *task);
 
 static inline void ptrace_link(struct task_struct *child,
 			       struct task_struct *new_parent)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f21af1..eb20ccd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,6 +28,12 @@
 #define CLONE_NEWPID		0x20000000	/* New pid namespace */
 
 /*
+ * Hijack flags
+ */
+#define HIJACK_PID	1	/* 'id' is a pid */
+#define HIJACK_CGROUP	2	/* 'id' is an open fd for a cgroup dir */
+
+/*
  * Scheduling policies
  */
 #
...

Previous Topic: Support for autofs in RHEL5-based VPS
Next Topic: [PATCH] Fix memory leak in cleanup_ipv6_mibs()
Goto Forum:
  


Current Time: Sun Oct 26 23:09:28 GMT 2025

Total time taken to generate the page: 0.10731 seconds