Quoting Cedric Le Goater (clg@fr.ibm.com):
> Mark Pflueger wrote:
> > hi everyone!
> >
> > i'm not subscribed to the list, so if you care to flame because of my noob
> > question, just do it to the list, otherwise please cc me.
>
> you should subscribe to containers@lists.osdl.org and send your ideas on that
> list. There's a BOF on that topic at OLS if you can attend.
>
> cheers,
>
> C.
Hi Mark,
Thanks for sending that patch. Ignoring code details for now, this is a
good time to discuss checkpoint strategies.
It looks like you are writing task memory to userspace explicitly on
demand. Dave Hansen is taking a different approach, using the swapfile
to back up memory. Eventually we would enable a swapfile per container.
Hopefully he can send a prototype out soon - I thought it was a really
cool idea, although of course we'll have to see how it pans out in
implementation :)
On the larger scale, there is the question of how we want to orchestrate
the checkpoint. Do we want to have one syscall enable a checkpoint of a
set of tasks, kicking out all the relevant information to userspace?
Do we want userspace to orchestrate the checkpoint, asking for the tasks
to be pulled off the runqueue, then polling for all the relevant
information (through /proc/pid/fd, etc), then putting the tasks back on
the runqueue?
Same as the above, but using the container interface to make it more
robust (i.e. pull all tasks off the runqueue using
echo 1 > /containers/vserver1/job_1/suspend) against for instance tasks
being forked while we're in a 'for p in $pids; suspend $pid'?
Use the freezer code to freeze and initiate dump on a set of tasks or
a container?
thanks,
-serge
> > i'm trying to write a checkpoint/restore module for processes and so have
> > a basic version going already - problem is, when i restore the process,
> > one of three things happens at random. first is, the process restored
> > segfaults. second is, i get a kernel null pointer dereference and third
> > is, i get a virtual address lookup error and a kernel crash. the trace
> > back and the address always change.
> >
> > the user space process is as simple as i could make it: (error checking
> > and debugging messages are left out)
> >
> >
> > void take_chkpt(void) {
> > pid_t pid;
> > char call_pid[10];
> > char call_num[10];
> >
> > chkptpid = getpid();
> > snprintf(call_pid, 9, "%d", chkptpid);
> > snprintf(call_num, 9, "%d", checkpointnum);
> >
> > switch(pid = fork()) {
> > case -1:
> > fprintf(stderr, "Fork failed.\n");
> > return;
> > break;
> > case 0: /* child process */
> > if(!execl("child_take", call_pid, call_num, (char *)0))
> > perror("execl: ");
> > break;
> > default: /* parent process */
> > waitpid(pid, NULL, 0);
> > break;
> > }
> >
> > return;
> > }
> >
> >
> > void restore_chkpts(void) {
> > pid_t pid;
> > char call_pid[10];
> > char call_num[10];
> >
> > ENTERFUN();
> >
> > if(restore_retry) // do nothing on second call to restore
> > return;
> >
> > chkptpid = getpid();
> > snprintf(call_pid, 9, "%d", chkptpid);
> > snprintf(call_num, 9, "%d", checkpointnum);
> >
> > switch(pid = fork()) {
> > case -1:
> > fprintf(stderr, "MP: Fork failed.\n");
> > return;
> > break;
> > case 0: /* child process */
> > if(!execl("child_restore", call_pid, call_num, (char *)0))
> > perror("execl: ");
> > break;
> > default: /* parent process */
> > INF(("Parent Process"));
> > restore_retry=1;
> > INF(("Wait for Child..."));
> > waitpid(pid, NULL, 0);
> > break;
> > }
> >
> > LEAVEFUN();
> >
> > return;
> > }
> >
> > int main(int argc, char* argv[]) {
> > take_chkpt();
> > printf("Hello cruel world!\n");
> > restore_chkpts();
> > return 0;
> > }
> >
> > where child_take and child_restore do the following:
> >
> >
> > void child_take_chkpt(int chkptpid, int checkpointnum) {
> > struct chkpt_ioctl chkptio;
> > int dev_fd; // ioctl device file
> > char chkptname[30];
> >
> > if ((dev_fd = open(CHKPT_DEVICE, O_RDWR)) < 0) {
> > perror("MP: Open device file");
> > exit(EXIT_FAILURE);
> > }
> > chkptio.pid = chkptpid;
> > snprintf(chkptname, 29, "/tmp/chkpt_%d_%d", chkptio.pid, checkpointnum);
> > chkptio.file = creat(chkptname, 00755);
> > sleep(1); // to go sure the parent process is in waitpid -- ugly,
> > but works
> > kill(chkptio.pid, SIGSTOP);
> > sleep(1);
> > ioctl(dev_fd, CHKPT_IOCTL_SAVE, (unsigned long)&chkptio);
> > close(dev_fd);
> > close(chkptio.file);
> > kill(chkptio.pid, SIGCONT);
> > exit(0);
> > }
> >
> > void child_restore_chkpts(int chkptpid, int checkpointnum) {
> > struct chkpt_ioctl chkptio;
> > int dev_fd; // ioctl device file
> > char chkptname[30];
> >
> > snprintf(chkptname, 29, "/tmp/chkpt_%d_%d", chkptpid, checkpointnum-1);
> > chkptio.file = open(chkptname, O_RDONLY);
> > chkptio.pid = chkptpid;
> > dev_fd = open(CHKPT_DEVICE, O_RDWR);
> > sleep(1);
> > kill(chkptpid, SIGSTOP);
> > sleep(1);
> > ioctl(dev_fd, CHKPT_IOCTL_RESTORE, (unsigned long)&chkptio);
> > close(chkptio.file);
> > close(dev_fd);
> > kill(chkptpid, SIGCONT);
> > exit(0);
> > }
> >
> > the header for the files is this:
> >
> >
> > enum {
> > CHKPT_IOCTL_SAVE,
> > CHKPT_IOCTL_RESTORE
> > };
> >
> > struct chkpt_ioctl {
> > pid_t pid; // for fork tests
> > int file;
> > };
> >
> > struct chkpt {
> > pid_t pid; // for fork tests
> > struct pt_regs regs;
> > unsigned int datasize;
> > unsigned int brksize;
> > unsigned int stacksize;
> > };
> >
> >
> > and finally the kernel module:
> >
> > int chkpt_ioctl_handler(struct inode *i, struct file *f,
> > unsigned int cmd, unsigned long arg) {
> > struct chkpt_ioctl pmio, *u_pmio;
> > int ret = -1;
> >
> > u_pmio = (struct chkpt_ioctl *)arg;
> >
> > switch(cmd) {
> > case CHKPT_IOCTL_SAVE:
> > if (copy_from_user(&pmio, u_pmio, sizeof(struct
> > chkpt_ioctl))) {
> > printk("...failed to copy from user\n");
> > ret = -1;
> > break;
> > }
> > if(chkpt_save(&pmio) < 0) {
> > printk("...failed to save chkpt\n");
> > ret = -1;
> > break;
> > }
> > ret = 0;
> > break;
> > case CHKPT_IOCTL_RESTORE:
> > INFO(("CHKPT_IOCTL_RESTORE"));
> > if (copy_from_user(&pmio, u_pmio, sizeof(struct
> > chkpt_ioctl))) {
> > printk("...failed to copy from user\n");
> > ret = -1;
> > break;
> > }
> > if (chkpt_restore(&pmio) < 0) {
> > printk("...failed to restore chkpt\n");
> > ret = -1;
> > break;
> > }
> > ret = 0;
> > break;
> > default:
> > printk("...illegal ioctl cmd\n");
> > ret = -1;
> > break;
> > }
> > return ret;
> > }
> >
> > static int chkpt_save(struct chkpt_ioctl *chkptio) {
> > struct task_struct *tsk;
> > struct chkpt chkpt;
> > unsigned int datasz, brksz, stacksz;
> > struct file *f;
> >
> > if (!(tsk = find_task_by_pid(chkptio->pid))) {
> > printk("...task %d not found\n", chkptio->pid);
> > return -1;
> > }
> >
> > f = current->files->fd[chkptio->file];
> >
> > datasz =
...