Features/COLO/Managed HOWTO

On every node do the following:

Install debian buster amd64 https://www.debian.org/distrib/

$ = run as user
# = run as root

Install packages:

# apt-get -y install git build-essential wget nano bridge-utils corosync pacemaker crmsh python3 pkg-config libglib2.0-dev libpixman-1-dev

Workaround:

# wget https://snapshot.debian.org/archive/debian/20200129T091834Z/pool/main/l/linux/linux-libc-dev_4.19.98-1_amd64.deb
# dpkg -i linux-libc-dev_4.19.98-1_amd64.deb

Install qemu:

$ git clone --single-branch --depth 1 -b new_build https://github.com/Lukey3332/qemu.git
$ cd qemu
$ ./configure --target-list=x86_64-softmmu,i386-softmmu --enable-replication --enable-colo-ra --enable-kvm --prefix=/usr
$ make -j4; make
# make install

Configure networking:

test-cluster-01 = 192.168.220.244
test-cluster-02 = 192.168.220.245

# cat > /etc/network/interfaces <<EOF
auto lo
iface lo inet loopback

iface eth0 inet manual

auto br0
iface br0 inet static
 mtu 1500
 bridge_ports eth0
 address 192.168.220.244
 netmask 255.255.255.0
 gateway 192.168.220.1

EOF

# cat > /etc/resolv.conf <<EOF
nameserver 192.168.220.1
EOF

# ifdown eth0
# ifup br0

Configure DNS:

# cat > /etc/hosts <<'EOF'
127.0.0.1       localhost
127.0.1.1       test-cluster-01.home.intra  test-cluster-01

# The following lines are desirable for IPv6 capable hosts
::1     localhost ip6-localhost ip6-loopback
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters

192.168.220.245 test-cluster-02.home.intra  test-cluster-02
EOF

# cat > /etc/hosts.augnew <<'EOF'
127.0.0.1       localhost
127.0.1.1       test-cluster-02.home.intra  test-cluster-02

# The following lines are desirable for IPv6 capable hosts
::1     localhost ip6-localhost ip6-loopback
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters

192.168.220.244 test-cluster-01.home.intra  test-cluster-01
EOF

Configure corosync:

# cat > /etc/corosync/corosync.conf <<'EOF'
# Please read the corosync.conf.5 manual page
totem {
        version: 2

        cluster_name: test-cluster
}

logging {
        # Log the source file and line where messages are being
        # generated. When in doubt, leave off. Potentially useful for
        # debugging.
        fileline: off
        # Log to standard error. When in doubt, set to yes. Useful when
        # running in the foreground (when invoking "corosync -f")
        to_stderr: yes
        # Log to a log file. When set to "no", the "logfile" option
        # must not be set.
        to_logfile: yes
        logfile: /var/log/corosync/corosync.log
        # Log to the system log daemon. When in doubt, set to yes.
        to_syslog: yes
        # Log debug messages (very verbose). When in doubt, leave off.
        debug: off
        # Log messages with time stamps. When in doubt, set to hires (or on)
        #timestamp: hires
        logger_subsys {
                subsys: QUORUM
                debug: off
        }
}

quorum {
        # Enable and configure quorum subsystem (default: off)
        # see also corosync.conf.5 and votequorum.5
        provider: corosync_votequorum
        two_node: 1
}

nodelist {

        node {
                # Hostname of the node
                name: test-cluster-01
                # Cluster membership node identifier
                nodeid: 1

                ring0_addr: 192.168.220.244
        }
        node {
                # Hostname of the node
                name: test-cluster-02
                # Cluster membership node identifier
                nodeid: 2

                ring0_addr: 192.168.220.245
        }
}
EOF

# systemctl enable corosync
# systemctl restart corosync
# systemctl restart pacemaker

Configure a qemu-colo cluster resource:

# crm ra info ocf:qemu:colo

# qemu-img create -f qcow2 /mnt/vms/vma.qcow2 10g

# crm configure primitive vma ocf:qemu:colo \
       meta target-role=Stopped \
       params active_hidden_dir="/mnt/vms" \
       options="-vnc :0 -enable-kvm -cpu qemu64,+kvmclock -m 512 -netdev bridge,br=br0,id=hn0 -device e1000,netdev=hn0 -device virtio-blk,drive=colo-disk0 -drive if=none,node-name=parent0,format=qcow2,file=/mnt/vms/vma.qcow2" \
       op start timeout=30s interval=0 \
       op stop timeout=10s interval=0 \
       op monitor role=Master interval=1000ms timeout=30s \
       op monitor role=Slave interval=1001ms timeout=30s \
       op notify timeout=30s interval=0 \
       op promote timeout=30s interval=0 \
       op demote timeout=120s interval=0
# crm configure clone vma_ms vma \
	meta promotable=true clone-max=2 promoted-max=1 notify=true target-role=Started

# crm_mon
# journalctl -e