#!/bin/bash

############################################################################
# mbhave: manually balanced high availability virtual environment.
############################################################################
# (c) 2008 L Garrido <luisgarrido@users.sourceforge.net>
# Based on a solution designed by Mark Gollahon.
############################################################################
#
# Procedure:
#
# [MS] means the action must be executed in both nodes.
# [M] means the action must be executed only in the master node.
#
#  [MS]  1. Calculate size and create a new logical volume to install the VE in.
#  [MS]  2. Create DRBD device (DRBD service must be running.)
#           - Add definition to /etc/drbd.conf.
#           - Create metadata.
#           - Bring it up.
#  [M]   3. Start DRBD device synchronization.
#  [MS]  4. Create mount point.
#  [M]   5. Create filesystem.
#  [M]   6. Mount filesystem.
#  [M]   7. Create VE.
#  [M]   8. Move VE conf file to filesystem.
#  [M]   9. Unmount filesystem.
#  [M]  10. Create inactive Heartbeat resources and constraints.
#  [M]  11. Wait until DRBD device is synced.
#  [MS] 12. Bring DRBD device down.
#  [M]  13. Activate Heartbeat resources.
#
############################################################################
#
# VE id: $VEID
# Name: NAME = ovz${VEID}
# Logical volume: $NAME
# DRBD resource: $NAME
# DRBD device: /dev/drbd$VEID
# Default DRBD port: 5$VEID
# Mount point: $VEROOT
# Heartbeat DRBD resource: $NAME
# Heartbeat Group resource: ${NAME}g
# Heartbeat Filesystem resource: ${NAME}fs
# Heartbeat ManageVE resource: ${NAME}ve
# Heartbeat constraints: 
#   colocation drbd and group: ${NAME}dcg
#   order drbd and group: ${NAME}dbg
#   colocation fs and ve: ${NAME}fcv
#   order fs and ve: ${NAME}fbv
#   preferred node: ${NAME}pn
#
############################################################################
#
# ssh configuration.
#
# In order to be able to execute commands in the slave machine fluently,
# it is recommended to enable ssh without password. This is also needed if
# you want to use vzmigrate. To achieve this execute these commands in the 
# master node (the one which you will run mbhave on):
#
# Create a new pair of RSA assymetric keys:
#
#     ssh-keygen -t rsa
#
# (just accept the default answers for the questions you are asked)
#
# Copy the public key to /root/.ssh/authorized_keys in the slave node:
#
#     scp  .ssh/id_rsa.pub <slave_node>:.ssh/authorized_keys
#
# (WARNING, check that you haven't a previous authorized_keys that you want to 
# keep; if you do, you must append id_rsa.pub to it.)
#
# Make sure your .ssh/authorized_keys is only readable by root:
#
#     ssh cluster12 chmod 600 .ssh/authorized_keys
#
# (notice you don't need a password anymore)
#
############################################################################

############################################################################
# Global configuration. TODO: send to a /etc/mbhave.conf? 
############################################################################

NODE1=cluster11
NODE2=cluster12
NODEIP1=192.168.0.11
NODEIP2=192.168.0.12
VESDIR=/vz/ve
VG=vgla
PREFNODE=$NODE1

############################################################################
# Functions.
############################################################################

####################################################
# error() : print error message and exit.
####################################################

error() {
  
  $ECHO "Error: "$1"!"
  exit 1
  
}

####################################################
# usage() : print usage options and exit.
####################################################

usage() {
  
  $CAT << EOF

Usage: mbhave [OPTION]... VEID COMMAND [COMMAND ...]

Performs command COMMAND on VE VEID.

Parameters:

  VEID: must be at least 101. If we want dynamic assignment of the DRBD port
  it must be not greater than 9999.
  
  COMMAND: must be one of the following:

  clv, create_lv      Create logical volume.
  dlv, delete_lv      Delete logical volume.
  cdrbd, create_drbd  Create DRBD device.
  ddrbd, delete_drbd  Delete DRBD device.
  sdrbd, stop_drbd    Bring down DRBD device.
  cmp, create_mp      Create mount point.
  dmp, delete_mp      Delete mount point.
  cfs, create_fs      Create filesystem.
  mfs, mount_fs       Mount filesystem.
  ufs, umount_fs      Unmount filesystem.
  cve, create_ve      Create VE.
  dve, delete_ve      Delete VE.
  wfsm, wait_for_sync Wait until DRBD sync is finished.
  cha, create_ha      Create heartbeat configuration.
  dha, delete_ha      Delete heartbeat configuration.
  aha, activate_ha    Activate heartbeat resources.
  sha, stop_ha        Stop heartbeat resources.
  mha, migrate_ha     Stop, migrate and restart heartbeat resources.
  cf, create_full     Runs clv cdrbd cmp cfs mfs cve ufs cha wfs sdrbd aha
  df, delete_full     Runs sha dha dve dmp ddrbd dlv 

Options:

  -s, --size    Disk space assigned to the VE partition in MB. Will be
                slightly larger than entered to allow for DRBD metadata.
  -g, --vg      LVM Volume Group where the DRBD partition will be created.
  -p, --port    TCP port to use for DRBD replication. If not specified it
                will be set to 5xxxx, where xxxx is the VEID.
  -h, --help    Print this help and exit.
  
EOF
  
  exit 0
  
}

####################################################
# create_lv() : Create logical volume.
####################################################

create_lv() {

  # VG exists?

  if ! $VGS $VG &>/dev/null ; then
    error "volume group $VG not found"
  fi

  # Size specified?

  if [ -z $VESIZE ]; then
    error " you must specify a size in MB for the VE"
  fi

  # Calculate LV size plus DRBD metadata overhead.
  # http://www.drbd.org/users-guide/ch-internals.html#s-meta-data-size

  LVSIZE=$(($VESIZE + $VESIZE / 32768 + 1))

  $ECHO "Creating volume $NAME at group $VG with size $LVSIZE MB."

  LCMD="$LVCREATE -L ${LVSIZE}M -n $NAME $VG"

  if ! $LCMD ; then
    error "could not create volume $NAME"
  fi
  
  if ! $SSHSLAVE "$LCMD" ; then
    error "could not create volume $NAME at $SLAVE"
  fi
  
}

####################################################
# delete_lv() : Delete logical volume.
####################################################

delete_lv() {

  LCMD="lvremove -f $LVDEV"
  if ! $LCMD ; then
    error "could not remove volume $LVDEV"
  fi

  if ! $SSHSLAVE "$LCMD" ; then
    error "could not remove volume $NAME at $SLAVE"
  fi
  
}

####################################################
# create_drbd() : create DRBD device.
####################################################

create_drbd() {

  # Check whether service drbd is active.

  if [ ! -b $LVDEV ]; then
    error "logical volume $LVDEV not found"
  fi

  if $GREP "resource $NAME " /etc/drbd.conf ; then
    error "DRBD resource $NAME already in use"
  fi

  # For a VEID between 101 and 9999, prefix it with a 5 and use it
  # as DRBD port if none has been specified.
  if [ -z $DRBDPORT ]; then
    if [ $VEID -lt 10000 ]; then
      DRBDPORT=5$VEID
      $ECHO "Using DRBD port ${DRBDPORT}."
    else
      error "a DRBD port must be specified for VEID $VEID"
    fi
  fi

  # Add configuration to /etc/drbd.conf

  DRBDCONF=$(cat <<EOF
resource $NAME {
  on $NODE1 {
    address $NODEIP1:$DRBDPORT;
    device $DRBDDEV; disk $LVDEV; meta-disk internal;
  }
  on $NODE2 {
    address $NODEIP2:$DRBDPORT;
    device $DRBDDEV; disk $LVDEV; meta-disk internal;
  }
}
EOF)

  $ECHO "$DRBDCONF" >> /etc/drbd.conf
  $SSHSLAVE "$ECHO "\""$DRBDCONF"\"" >> /etc/drbd.conf"
  
  # Create metadata.

  LCMD="$DRBDADM -- --force create-md $NAME"

  if ! $LCMD ; then
    error "could not create metadata for drbd resource $NAME"
  fi

  if ! $SSHSLAVE $LCMD ; then
    error "could not create metadata for drbd resource $NAME at $SLAVE"
  fi

  # Start the device

  LCMD="$DRBDADM up $NAME"

  if ! $LCMD ; then
    error "could not start drbd resource $NAME"
  fi

  if ! $SSHSLAVE $LCMD ; then
    error "could not start drbd resource $NAME at $SLAVE"
  fi

  # Configure DRBD resource as primary and start synchronization.
  
  $DRBDADM -- --overwrite-data-of-peer primary $NAME

}

####################################################
# delete_drbd() : delete DRBD device.
####################################################

delete_drbd() {

  if ! $GREP -q "resource $NAME " /etc/drbd.conf ; then
    error "DRBD resource $NAME does not exist"
  fi

  # Make sure the device is down.

  if [ `$DRBDADM state $NAME` != "Unconfigured" ]; then
    error "drbd resource $NAME is not in Unconfigured state"
  fi

  # Eliminate its entry at drbd.conf

  LCMD="$CP -f /etc/drbd.conf /etc/drbd.conf.bak"

  if ! $LCMD ; then
    error "could not backup /etc/drbd.conf"
  fi
  
  if ! $SSHSLAVE $LCMD ; then
    error "could not backup /etc/drbd.conf at $SLAVE"
  fi
  
  SEDSCR="/resource $NAME /,/^}/d"

  if ! $SED "$SEDSCR" /etc/drbd.conf.bak > /etc/drbd.conf ; then
    error "could not remove $NAME from /etc/drbd.conf"
  fi
  
  if ! $SSHSLAVE $SED \""$SEDSCR"\"" /etc/drbd.conf.bak > /etc/drbd.conf" ; then
    error "could not remove $NAME from /etc/drbd.conf at $SLAVE"
  fi
  
  # The device will remain showing up until the drbd module is reloaded.
  
}

####################################################
# stop_drbd() : stops the DDRB resource.
####################################################

stop_drbd() {

  if ! $DRBDADM down $NAME ; then
    error "could not bring down $NAME"
  fi
  
  if ! $SSHSLAVE $DRBDADM down $NAME ; then
    error "could not bring down $NAME at $SLAVE"
  fi
  
}

####################################################
# create_mp() : create mount point.
####################################################

create_mp() {

  $MKDIR -p "$VEROOT"
  $SSHSLAVE $MKDIR -p "$VEROOT"
  
}

####################################################
# delete_mp() : delete mount point.
####################################################

delete_mp() {

  $RMDIR "$VEROOT"
  $SSHSLAVE $RMDIR "$VEROOT"
  
}

####################################################
# create_fs() : create filesystem for VE.
####################################################

create_fs() {

  # Create filesystem.

  $MKFS $DRBDDEV

}

####################################################
# mount_fs() : mount filesystem for VE.
####################################################

mount_fs() {

  if ! $MOUNT -t ext3 $DRBDDEV $VEROOT ; then
    error "could not mount $DRBDDEV on $VEROOT"
  fi

  # Delete the lost+found directory that appears when the fs is created.

  $RMDIR $VEROOT/lost+found

  # TODO: implement live migration. Create a dir for checkpoints.
  # Size of a checkpoint? Calculate some extra space for it at create_lv?

  # $MKDIR $VEROOT/dump
  
}

####################################################
# umount_fs() : umount filesystem for VE.
####################################################

umount_fs() {

  if ! $UMOUNT $VEROOT ; then
    error "could not umount $VEROOT"
  fi

}

####################################################
# create_ve() : create VE and move its conf file to
# the DRBD resource (only in one server.)
####################################################

create_ve() {

  if ! $VZCTL create $VEID ; then
    error "could not create VE $VEID"
  fi
  
  # Move conf file to own fs.
  
  $MV /etc/vz/conf/$VEID.conf $VEROOT/$VEID.conf

  # Create symlink to conf in own fs.
  
  LCMD="$LN -sf $VEROOT/$VEID.conf /etc/vz/conf/$VEID.conf"

  $LCMD

  $SSHSLAVE "$LCMD"

}

####################################################
# delete_ve() : delete VE .
####################################################

delete_ve() {

  # Destroying a VE means deleting VE_PRIVATE and renaming .conf to 
  # .conf.destroyed. Since we are deleting its partition we don't need to
  # run vzctl destroy, just remove the only trace link to its conf file.
  
  LCMD="$RM -f /etc/vz/conf/$VEID.conf"
  
  $LCMD

  $SSHSLAVE "$LCMD"

}

####################################################
# wait_for_sync() : waits until DRBD is in sync.
####################################################

wait_for_sync() {

  while
    MSG=`$SED -n "/$VEID: cs:Sync/{n;n;p;n;p}" /proc/drbd`
    [ -n "$MSG" ]
  do

    $ECHO "$MSG"
    $ECHO -ne "\033[2A"
    $SLEEP 1

  done

  # Last feedback
  $SED -n "/$VEID:/{p;n;p}" /proc/drbd

}

####################################################
# create_ha() : creates heartbeat resource.
####################################################

create_ha() {

  $CIBADMIN -C -p -o constraints << EOF
<constraints>
  <rsc_order id="${NAME}dbg" to="$CIBMS" to_action="promote" from="$CIBGP" action="start"/>
  <rsc_colocation id="${NAME}dcg" to="$CIBMS" to_role="master" from="$CIBGP" score="infinity"/>
  <rsc_order id="${NAME}fbv" to="$CIBFS" from="$CIBVE"/>
  <rsc_colocation id="${NAME}fcv" to="$CIBFS" from="$CIBVE" score="infinity"/>
  <rsc_location id="${NAME}pn" rsc="$CIBMS">
    <rule id="${NAME}pn-rl" role="master" score="100">
      <expression id="${NAME}pn-rl-ex" attribute="#uname" operation="eq" value="$PREFNODE"/>
    </rule>
  </rsc_location>
</constraints>
EOF

  $CIBADMIN -C -p -o resources << EOF
<resources>
  <master_slave id="$CIBMS">
    <meta_attributes id="$CIBMS-ma">
      <attributes>
        <nvpair id="$CIBMS-ma-1" name="clone_max" value="2"/>
        <nvpair id="$CIBMS-ma-2" name="clone_node_max" value="1"/>
        <nvpair id="$CIBMS-ma-3" name="master_max" value="1"/>
        <nvpair id="$CIBMS-ma-4" name="master_node_max" value="1"/>
        <nvpair id="$CIBMS-ma-5" name="notify" value="yes"/>
        <nvpair id="$CIBMS-ma-6" name="globally_unique" value="false"/>
        <nvpair id="$CIBMS-ma-7" name="target_role" value="stopped"/>
      </attributes>
    </meta_attributes>
    <primitive id="$NAME" class="ocf" provider="heartbeat" type="drbd">
      <instance_attributes id="$NAME-ia">
        <attributes>
          <nvpair id="$NAME-ia-1" name="drbd_resource" value="$NAME"/>
        </attributes>
      </instance_attributes>
      <operations>
        <op id="$NAME-op-1" name="monitor" interval="59s" timeout="10s" role="Master"/>
        <op id="$NAME-op-2" name="monitor" interval="60s" timeout="10s" role="Slave"/>
      </operations>
    </primitive>
  </master_slave>
  <group id="$CIBGP">
    <meta_attributes id="$CIBGP-ma">
      <attributes>
        <nvpair id="$CIBGP-ma-1" name="target_role" value="stopped"/>
      </attributes>
    </meta_attributes>
    <primitive id="$CIBFS" class="ocf" provider="heartbeat" type="Filesystem">
      <instance_attributes id="$CIBFS-ia">
        <attributes>
          <nvpair id="$CIBFS-ia-1" name="fstype" value="ext3"/>
          <nvpair id="$CIBFS-ia-2" name="directory" value="$VEROOT"/>
          <nvpair id="$CIBFS-ia-3" name="device" value="$DRBDDEV"/>
        </attributes>
      </instance_attributes>
    </primitive>
    <primitive id="$CIBVE" class="ocf" provider="heartbeat" type="ManageVE">
      <instance_attributes id="$CIBVE-ia">
        <attributes>
          <nvpair id="$CIBVE-ia-1" name="veid" value="$VEID"/>
        </attributes>
      </instance_attributes>
    </primitive>
  </group>
</resources>
EOF

}

####################################################
# delete_ha() : deletes heartbeat resource.
####################################################

delete_ha() {

  # Make sure the device is down.

  if [ `$DRBDADM state $NAME` != "Unconfigured" ]; then
    error "drbd resource $NAME is not in Unconfigured state"
  fi

  $CIBADMIN -d -o resources -X "<master_slave id=\"$CIBMS\"/>"
  $CIBADMIN -d -o resources -X "<group id=\"$CIBGP\"/>"
  $CIBADMIN -d -o constraints -X "<rsc_order id=\"${NAME}dbg\">"
  $CIBADMIN -d -o constraints -X "<rsc_colocation id=\"${NAME}dcg\">"
  $CIBADMIN -d -o constraints -X "<rsc_order id=\"${NAME}fbv\">"
  $CIBADMIN -d -o constraints -X "<rsc_colocation id=\"${NAME}fcv\">"
  $CIBADMIN -d -o constraints -X "<rsc_location id=\"${NAME}pn\">"
  
}

####################################################
# activate_ha() : activates heartbeat resource.
####################################################

activate_ha() {

  $CRM_RESOURCE -r $CIBMS --meta -p target_role -v "#default"
  $CRM_RESOURCE -r $CIBGP --meta -p target_role -v "#default"

}

####################################################
# stop_ha() : stops heartbeat resource.
####################################################

stop_ha() {

  $CRM_RESOURCE -r $CIBGP --meta -p target_role -v "stopped"
  $CRM_RESOURCE -r $CIBMS --meta -p target_role -v "stopped"

  # Wait until it is stopped.
  
  $ECHO -n "Waiting for $NAME to stop"
  
  local COUNT=0;
  
  while [ `$DRBDADM state $NAME` != "Unconfigured" -a $COUNT -le 30 ]; do
  
    $ECHO -n "."
    $SLEEP 2
    COUNT=$(($COUNT + 1))
  
  done
    
  if [ $COUNT -gt 30 ]; then
    $ERROR "$NAME is taking too long to stop"
  else
    $ECHO ".done!"
  fi
  
}

####################################################
# migrate_ha() : migrates resource.
####################################################

migrate_ha() {

  $CIBADMIN -M -p << EOF
<expression id="${NAME}pn-rl-ex" attribute="#uname" operation="eq" value="$PREFNODE"/>
EOF

}

############################################################################
# Execution of the script begins here. 
############################################################################

####################################################
# Tools. 
####################################################

CP=/bin/cp
RM=/bin/rm
MV=/bin/mv
LN=/bin/ln
MKDIR=/bin/mkdir
RMDIR=/bin/rmdir

MOUNT=/bin/mount
UMOUNT=/bin/umount
MKFS=/sbin/mkfs.ext3

CAT=/bin/cat
SED=/bin/sed
GREP=/bin/grep
ECHO=/bin/echo
SLEEP=/bin/sleep

SSH=/usr/bin/ssh
SCP=/usr/bin/scp

LVCREATE=/usr/sbin/lvcreate
LVREMOVE=/usr/sbin/lvremove
VGS=/usr/sbin/vgs

DRBDADM=/sbin/drbdadm
CIBADMIN=/usr/sbin/cibadmin

VZCTL=/usr/sbin/vzctl

CRM_RESOURCE=/usr/sbin/crm_resource

####################################################
# Extract command line parameters. 
####################################################

if [ $# -eq 0 ]; then
  usage
fi

while [ $# -gt 0 ]; do    # Until you run out of parameters . . .

  case "$1" in

    -h|--help)
      usage;
      ;;

    -s|--size)
      VESIZE="$2"
      shift
      if [ ! $VESIZE -ge 0 ]; then
        error "incorrect VE size"
      fi
      ;;

    -g|--vg)
      VG="$2"
      shift
      ;;

    -p|--port)
      DRBDPORT="$2"
      shift
      ;;

    -n|--node)
      PREFNODE="$2"
      if [ "$PREFNODE" != "$NODE1" -a "$PREFNODE" != "$NODE2" ]; then
        error "preferred node must be either $NODE1 or $NODE2"
      fi
      shift
      ;;

    *)
      if [ -z "$1" -o -z "$2" ]; then
        usage;
      fi
      VEID="$1"
      if [ $VEID -lt 101 ]; then
        error "VEID must be at least 101"
      fi
      shift
      break
      ;;
      
  esac

  shift       # Check next set of parameters.

done

####################################################
# Set up variables.
####################################################

# See who we are:

MASTER=`/bin/uname -n`

if [ "$MASTER" = "$NODE1" ]; then
  SLAVE="$NODE2"
else
  if [ "$MASTER" != "$NODE2" ]; then
    error "this script must be run in either $NODE1 or $NODE2"
  else
    SLAVE="$NODE1"
  fi
fi

if [ -z "$PREFNODE" ]; then
  PREFNODE="$MASTER"
fi

DRBDDEV=/dev/drbd$VEID
NAME=ovz$VEID
LVDEV=/dev/$VG/$NAME
VEROOT=$VESDIR/$VEID
SSHSLAVE="$SSH $SLAVE"
CIBMS=${NAME}ms
CIBGP=${NAME}gp
CIBFS=${NAME}fs
CIBVE=${NAME}ve


####################################################
# Execute command.
####################################################

while [ $# -gt 0 ]; do    # Until you run out of parameters . . .

  case "$1" in

    clv|create_lv)
      create_lv  
      ;;
  
    dlv|delete_lv)
      delete_lv
      ;;
  
    cdrbd|create_drbd)
      create_drbd
      ;;
  
    ddrbd|delete_drbd)
      delete_drbd
      ;;
  
    sdrbd|stop_drbd)
      stop_drbd
      ;;
  
    cmp|create_mp)
      create_mp
      ;;
  
    dmp|delete_mp)
      delete_mp
      ;;
  
    cfs|create_fs)
      create_fs
      ;;
  
    mfs|mount_fs)
      mount_fs
      ;;
  
    ufs|umount_fs)
      umount_fs
      ;;
  
    cve|create_ve)
      create_ve
      ;;
  
    dve|delete_ve)
      delete_ve
      ;;
  
    wfs|wait_for_sync)
      wait_for_sync
      ;;
  
    cha|create_ha)
      create_ha
      ;;
  
    dha|delete_ha)
      delete_ha
      ;;
  
    aha|activate_ha)
      activate_ha
      ;;
  
    sha|stop_ha)
      stop_ha
      ;;
  
    mha|migrate_ha)
      migrate_ha
      ;;
  
    cf|create_full)
      create_lv
      create_drbd
      create_mp
      create_fs
      mount_fs
      create_ve
      umount_fs
      create_ha
      wait_for_sync
      stop_drbd
      activate_ha
      ;;
  
    df|delete_full)
      stop_ha
      delete_ha
      delete_ve
      delete_mp
      delete_drbd
      delete_lv
      ;;
  
    *)
      error "$1 is not a mbhave command"
      ;;
  
  esac

  shift
  
done

