When a process in a container signals its container-init, we want
to ensure that the signal does not terminate the container-init.
i.e if the container-init has no handler for the signal, and the
signal is fatal, we want to ignore the signal.
Patches at the following URL check whether the signal will terminate
the cinit (i.e is a fatal signal and the cinit has no handler for it).
https://lists.linux-foundation.org/pipermail/containers/2007-September/007019.html
But if the fatal signal is currently blocked by the container-init,
those checks fail since we cannot ignore the signals even though it
may be fatal. This is because the container-init may install a handler
for that signal, before unblocking it. So we must post the signal.
But if the container-init unblocks the signal without setting up
a handler, the signal would terminate the container-init.
This patch (very lightly tested :-) attempts to queue the blocked
signals to the container init separately and then requeue them
on the container-init's normal queue when the signal is unblocked.
This patch is just a prototype and to indicate the basic idea and
scope of changes. If it looks interesting/feasible I could do more
testing and possibly some reorg.
---
fs/binfmt_elf.c | 5
fs/proc/array.c | 10 +
include/linux/pid_namespace.h | 2
kernel/pid.c | 12 ++
kernel/signal.c | 211 ++++++++++++++++++++++++++++++++++++++++--
5 files changed, 230 insertions(+), 10 deletions(-)
Index: 2.6.23-rc8-mm2/kernel/signal.c
===================================================================
--- 2.6.23-rc8-mm2.orig/kernel/signal.c 2007-10-09 22:41:29.000000000 -0700
+++ 2.6.23-rc8-mm2/kernel/signal.c 2007-10-09 22:48:28.000000000 -0700
@@ -46,7 +46,7 @@ static int sig_init_ignore(struct task_s
// Currently this check is a bit racy with exec(),
// we can _simplify_ de_thread and close the race.
- if (likely(!is_cgroup_init(tsk->group_leader)))
+ if (likely(!is_container_init(tsk->group_leader)))
return 0;
if (is_current_in_ancestor_pid_ns(tsk) && !in_interrupt())
@@ -119,11 +119,28 @@ static inline int has_pending_signals(si
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
+static int task_has_pending_signals(struct task_struct *t)
+{
+ if (PENDING(&t->pending, &t->blocked) ||
+ PENDING(&t->signal->shared_pending, &t->blocked))
+ return 1;
+
+ if (is_container_init(t)) {
+ struct pid_namespace *ns = task_active_pid_ns(t);
+
+ if (!ns)
+ return 0;
+
+ return PENDING(&ns->cinit_blocked_pending, &t->blocked) ||
+ PENDING(&ns->cinit_blocked_shared_pending, &t->blocked);
+ }
+ return 0;
+}
+
+
static int recalc_sigpending_tsk(struct task_struct *t)
{
- if (t->signal->group_stop_count > 0 ||
- PENDING(&t->pending, &t->blocked) ||
- PENDING(&t->signal->shared_pending, &t->blocked)) {
+ if (t->signal->group_stop_count > 0 || task_has_pending_signals(t)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
return 1;
}
@@ -235,6 +252,20 @@ void flush_sigqueue(struct sigpending *q
}
}
+static void flush_cinit_signals(struct task_struct *t)
+{
+ struct pid_namespace *ns;
+
+ if (!is_container_init(t))
+ return;
+ ns = task_active_pid_ns(t);
+ if (ns) {
+ flush_sigqueue(&ns->cinit_blocked_pending);
+ flush_sigqueue(&ns->cinit_blocked_shared_pending);
+
+ }
+}
+
/*
* Flush all pending signals for a task.
*/
@@ -246,6 +277,7 @@ void flush_signals(struct task_struct *t
clear_tsk_thread_flag(t,TIF_SIGPENDING);
flush_sigqueue(&t->pending);
flush_sigqueue(&t->signal->shared_pending);
+ flush_cinit_signals(t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
}
@@ -400,6 +432,12 @@ int dequeue_signal(struct task_struct *t
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
+ /*
+ * Note: We can ignore any signals for container init that are
+ * queued in the pid namespace since they are only queued there
+ * when signals are blocked (and we don't dequeue blocked signals
+ * here anyway)
+ */
signr = __dequeue_signal(&tsk->pending, mask, info);
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
@@ -579,6 +617,21 @@ static void do_notify_parent_cldstop(str
* actual continuing for SIGCONT, but not the actual stopping for stop
* signals. The process stop is done as a signal action for SIG_DFL.
*/
+static void rm_from_cinit_queue(unsigned long mask, struct task_struct *t)
+{
+ struct pid_namespace *ns;
+
+ if (!is_container_init(t))
+ return;
+
+ ns = task_active_pid_ns(t);
+
+ rm_from_queue(mask, &ns->cinit_blocked_pending);
+ rm_from_queue(mask, &ns->cinit_blocked_shared_pending);
+
+ return;
+}
+
static void handle_stop_signal(int sig, struct task_struct *p)
{
struct task_struct *t;
@@ -597,6 +650,7 @@ static void handle_stop_signal(int sig,
* This is a stop signal. Remove SIGCONT from all queues.
*/
rm_from_queue(sigmask(SIGCONT), &p->signal->shared_pending);
+ rm_from_cinit_queue(sigmask(SIGCONT), p);
t = p;
do {
rm_from_queue(sigmask(SIGCONT), &t->pending);
@@ -627,6 +681,7 @@ static void handle_stop_signal(int sig,
spin_lock(&p->sighand->siglock);
}
rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
+ rm_from_cinit_queue(SIG_KERNEL_STOP_MASK, p);
t = p;
do {
unsigned int state;
@@ -802,6 +857,7 @@ static int
specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
{
int ret = 0;
+ struct sigpending *pending;
BUG_ON(!irqs_disabled());
assert_spin_locked(&t->sighand->siglock);
@@ -816,13 +872,75 @@ specific_send_sig_info(int sig, struct s
if (LEGACY_QUEUE(&t->pending, sig))
goto out;
- ret = send_signal(sig, info, t, &t->pending);
+ pending = &t->pending;
+ if (is_container_init(t) && sigismember(&t->blocked, sig) &&
+ !is_current_in_ancestor_pid_ns(t)) {
+ struct pid_namespace *ns = task_active_pid_ns(t);
+
+ /*
+ * Hmm. If container init is exiting (ns == NULL) and we
+ * are trying to post a blocked signal. Post to the normal
+ * queue for now.
+ */
+ if (ns && LEGACY_QUEUE(&ns->cinit_blocked_pending, sig))
+ goto out;
+ else if (ns)
+ pending = &ns->cinit_blocked_pending;
+ }
+
+ ret = send_signal(sig, info, t, pending);
if (!ret && !sigismember(&t->blocked, sig))
signal_wake_up(t, sig == SIGKILL);
out:
return ret;
}
+static void requeue_cinit_signals(struct task_struct *t, sigset_t *set)
+{
+ struct sigqueue *q, *n;
+ struct pid_namespace *ns = task_active_pid_ns(t);
+ struct sigpending *ns_pending;
+ struct sigpending *ns_shpending;
+ struct sigpending *tsk_pending;
+ struct sigpending *tsk_shpending;
+
+ /*
+ * Unblocking a signal while a process is exitng ?
+ */
+ if (!ns)
+ return;
+
+ /*
+ * For each signal being unblocked, remove it from the namespace
+ * pending list and add it to task's pending list
+ */
+ tsk_pending = &t->pending;
+ ns_pending = &ns->cinit_blocked_pending;
+
+ list_for_each_entry_safe(q, n, &ns_pending->list, list) {
+ if (sigismember(set, q->info.si_signo)) {
+ list_del_init(&q->list);
+ sigdelset(&ns_pending->signal, q->info.si_signo);
+
+ list_add_tail(&q->list, &tsk_pending->list);
+ sigaddset(&tsk_pending->signal, q->info.si_signo);
+ }
+ }
+
+ /* Repeat for the shared-pending signals */
+ tsk_shpending = &t->signal->shared_pending;
+ ns_shpending = &ns->cinit_blocked_shared_pending;
+ list_for_each_entry_safe(q, n, &ns_shpending->list, list) {
+ if (sigismember(set, q->info.si_signo)) {
+ list_del_init(&q->list);
+ sigdelset(&ns_shpending->signal, q->info.si_signo);
+
+ list_add_tail(&q->list, &tsk_shpending->list);
+ sigaddset(&tsk_shpending->signal, q->info.si_signo);
+ }
+ }
+}
+
/*
* Force a signal that the process can't ignore: if necessary
* we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -848,6 +966,20 @@ force_sig_info(int sig, struct siginfo *
action->sa.sa_handler = SIG_DFL;
if (blocked) {
sigdelset(&t->blocked, sig);
+ if (is_container_init(t)) {
+ struct pid_namespace *ns;
+ sigset_t set;
+ /*
+ * We just unblocked a signal. Requeue any
+ * pending instances of the signal on the
+ * namespace queue to the task's pending
+ * queue.
+ */
+ sigemptyset(&set);
+ sigaddset(&set, sigmask(sig));
+ ns = task_active_pid_ns(t);
+ requeue_cinit_signals(t, &set);
+ }
recalc_sigpending_and_wake(t);
}
}
@@ -890,6 +1022,13 @@ __group_complete_signal(int sig, struct
struct task_struct *t;
/*
+ * If signal came from same or descendant namespace and is a
+ * blocked signal, we process the signal when we unblock it
+ */
+ if (!is_current_in_ancestor_pid_ns(p) && sigismember(&p->blocked, sig))
+ return;
+
+ /*
* Now find a thread we can wake up to take the signal off the queue.
*
* If the main thread wants the signal, it gets first crack.
@@ -987,6 +1126,7 @@ int
__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
int ret = 0;
+ struct sigpending *pending;
assert_spin_locked(&p->sighand->siglock);
handle_stop_signal(sig, p);
@@ -999,12 +1139,28 @@ __group_send_sig_info(int sig, struct si
/* This is a non-RT signal and we already have one queued. */
return ret;
+ pending = &p->signal->shared_pending;
+ if (is_container_init(p) && sigismember(&p->blocked, sig) &&
+ !is_current_in_ancestor_pid_ns(p)
...