Appendix

This is where you will find all the code used in the article 'RabbitMQ for VMware vCloud Suite.

====================================================================================
Section: Installation
File System Location: /etc/yum.repos.d/rabbitmq_erlang.repo
====================================================================================

[rabbitmq_erlang]
name=rabbitmq_erlang
baseurl=https://packagecloud.io/rabbitmq/erlang/el/8/$basearch
repo_gpgcheck=1
gpgcheck=1
enabled=1
# PackageCloud's repository key and RabbitMQ package signing key
gpgkey=https://packagecloud.io/rabbitmq/erlang/gpgkey
       https://dl.bintray.com/rabbitmq/Keys/rabbitmq-release-signing-key.asc
sslverify=1
sslcacert=/etc/pki/tls/certs/ca-bundle.crt
metadata_expire=300

[rabbitmq_erlang-source]
name=rabbitmq_erlang-source
baseurl=https://packagecloud.io/rabbitmq/erlang/el/8/SRPMS
repo_gpgcheck=1
gpgcheck=0
enabled=1
# PackageCloud's repository key and RabbitMQ package signing key
gpgkey=https://packagecloud.io/rabbitmq/erlang/gpgkey
       https://dl.bintray.com/rabbitmq/Keys/rabbitmq-release-signing-key.asc
sslverfy=1
sslcacert=/etc/pki/tls/certs/ca-bundle.crt
metadata_expire=300


====================================================================================
Section: Installation
File System Location: /etc/rabbitmq/rabbitmq.conf
Step: 18
====================================================================================

# The RabbitMQ server source repository contains an example rabbitmq.conf file named rabbitmq.conf.example.
# It contains examples of most of the configuration items you might want to set (with some very obscure
# ones omitted), along with documentation for those settings.
#
# https://github.com/rabbitmq/rabbitmq-server/blob/v3.7.x/docs/rabbitmq.conf.example

##
## Clustering
## =====================
##

# This guide covers one specific aspect of clustering: network failures between nodes, their effects and recovery options.
# https://www.rabbitmq.com/partitions.html

# Options and Behaviors described here --> https://www.rabbitmq.com/partitions.html#automatic-handling
cluster_partition_handling = pause_minority

## Make clustering happen *automatically* at startup. Only applied
## to nodes that have just been reset or started for the first time.
##
## Relevant doc guide: https://rabbitmq.com//cluster-formation.html
##

# Peer Discovery Mechanism
cluster_formation.peer_discovery_backend = classic_config

# Hardcode the list of nodes
cluster_formation.classic_config.nodes.1 = rabbit@rabbit1.domain.local
cluster_formation.classic_config.nodes.2 = rabbit@rabbit2.domain.local
cluster_formation.classic_config.nodes.3 = rabbit@rabbit3.domain.local


## This node's type can be configured. If you are not sure
## what node type to use, always use 'disc'.
cluster_formation.node_type = disc


## Mirror sync batch size, in messages. Increasing this will speed
## up syncing but total batch size in bytes must not exceed 2 GiB.
## Available in RabbitMQ 3.6.0 or later.
##
mirroring_sync_batch_size = 100

vm_memory_high_watermark.relative = 0.8
disk_free_limit.absolute = 5GB


====================================================================================
Section: Installation
File System Location: /etc/rabbitmq/rabbitmq.conf
Step: 55
====================================================================================

# The RabbitMQ server source repository contains an example rabbitmq.conf file named rabbitmq.conf.example.
# It contains examples of most of the configuration items you might want to set (with some very obscure
# ones omitted), along with documentation for those settings.
#
# https://github.com/rabbitmq/rabbitmq-server/blob/v3.7.x/docs/rabbitmq.conf.example

##
## Clustering
## =====================
##

# This guide covers one specific aspect of clustering: network failures between nodes, their effects and recovery options.
# https://www.rabbitmq.com/partitions.html

# Options and Behaviors described here --> https://www.rabbitmq.com/partitions.html#automatic-handling
cluster_partition_handling = pause_minority

## Make clustering happen *automatically* at startup. Only applied
## to nodes that have just been reset or started for the first time.
##
## Relevant doc guide: https://rabbitmq.com//cluster-formation.html
##

# Peer Discovery Mechanism
cluster_formation.peer_discovery_backend = classic_config

# Hardcode the list of nodes
cluster_formation.classic_config.nodes.1 = rabbit@rabbit1.domain.local
cluster_formation.classic_config.nodes.2 = rabbit@rabbit2.domain.local
cluster_formation.classic_config.nodes.3 = rabbit@rabbit3.domain.local


## This node's type can be configured. If you are not sure
## what node type to use, always use 'disc'.
cluster_formation.node_type = disc


## Mirror sync batch size, in messages. Increasing this will speed
## up syncing but total batch size in bytes must not exceed 2 GiB.
## Available in RabbitMQ 3.6.0 or later.
##
mirroring_sync_batch_size = 100

vm_memory_high_watermark.relative = 0.8
disk_free_limit.absolute = 5GB
##
## Encrypted Communications
## ========================
##

listeners.tcp         = none
listeners.ssl.default = 5671
num_acceptors.ssl     = 10

## TLS configuration.
##
## Related doc guide: https://rabbitmq.com/ssl.html.
##

ssl_options.verify               = verify_peer
ssl_options.fail_if_no_peer_cert = true
ssl_options.cacertfile           = /etc/pki/tls/certs/domain.local.CA.cer.pem
ssl_options.certfile             = /etc/pki/tls/certs/rabbitmqcluster.pem
ssl_options.keyfile              = /etc/pki/tls/certs/rabbitmq.key
ssl_options.depth                = 5
ssl_options.client_renegotiation = false
ssl_options.secure_renegotiate   = true
ssl_options.honor_cipher_order   = true
ssl_options.honor_ecc_order      = true
ssl_options.versions.1           = tlsv1.3
ssl_options.versions.2           = tlsv1.2
ssl_options.ciphers.1            = ECDHE-ECDSA-AES256-GCM-SHA384
ssl_options.ciphers.2            = ECDHE-RSA-AES256-GCM-SHA384
ssl_options.ciphers.3            = ECDHE-ECDSA-AES256-SHA384
ssl_options.ciphers.4            = ECDHE-RSA-AES256-SHA384
ssl_options.ciphers.5            = ECDH-ECDSA-AES256-GCM-SHA384
ssl_options.ciphers.6            = ECDH-RSA-AES256-GCM-SHA384
ssl_options.ciphers.7            = ECDH-ECDSA-AES256-SHA384
ssl_options.ciphers.8            = ECDH-RSA-AES256-SHA384
ssl_options.ciphers.9            = DHE-RSA-AES256-GCM-SHA384
ssl_options.ciphers.10           = DHE-DSS-AES256-GCM-SHA384
ssl_options.ciphers.11           = DHE-RSA-AES256-SHA256
ssl_options.ciphers.12           = DHE-DSS-AES256-SHA256
ssl_options.ciphers.13           = ECDHE-ECDSA-AES128-GCM-SHA256
ssl_options.ciphers.14           = ECDHE-RSA-AES128-GCM-SHA256
ssl_options.ciphers.15           = ECDHE-ECDSA-AES128-SHA256
ssl_options.ciphers.16           = ECDHE-RSA-AES128-SHA256
ssl_options.ciphers.17           = ECDH-ECDSA-AES128-GCM-SHA256
ssl_options.ciphers.18           = ECDH-RSA-AES128-GCM-SHA256
ssl_options.ciphers.19           = ECDH-ECDSA-AES128-SHA256
ssl_options.ciphers.20           = ECDH-RSA-AES128-SHA256
ssl_options.ciphers.21           = DHE-RSA-AES128-GCM-SHA256
ssl_options.ciphers.22           = DHE-DSS-AES128-GCM-SHA256
ssl_options.ciphers.23           = DHE-RSA-AES128-SHA256
ssl_options.ciphers.24           = DHE-DSS-AES128-SHA256
ssl_options.ciphers.25           = ECDHE-ECDSA-AES256-SHA
ssl_options.ciphers.26           = ECDHE-RSA-AES256-SHA
ssl_options.ciphers.27           = DHE-RSA-AES256-SHA
ssl_options.ciphers.28           = DHE-DSS-AES256-SHA
ssl_options.ciphers.29           = ECDH-ECDSA-AES256-SHA
ssl_options.ciphers.30           = ECDH-RSA-AES256-SHA
ssl_options.ciphers.31           = ECDHE-ECDSA-AES128-SHA
ssl_options.ciphers.32           = ECDHE-RSA-AES128-SHA
ssl_options.ciphers.33           = DHE-RSA-AES128-SHA
ssl_options.ciphers.34           = DHE-DSS-AES128-SHA
ssl_options.ciphers.35           = ECDH-ECDSA-AES128-SHA
ssl_options.ciphers.36           = ECDH-RSA-AES128-SHA


##
## Management Web UI Encryption
##

management.ssl.port                 = 15671
management.ssl.cacertfile           = /etc/pki/tls/certs/domain.local.CA.cer.pem
management.ssl.certfile             = /etc/pki/tls/certs/rabbitmqcluster.pem
management.ssl.keyfile              = /etc/pki/tls/certs/rabbitmq.key
management.ssl.honor_cipher_order   = true
management.ssl.honor_ecc_order      = true
management.ssl.client_renegotiation = false
management.ssl.secure_renegotiate   = true
management.ssl.versions.1           = tlsv1.3
management.ssl.versions.2           = tlsv1.2
management.ssl.ciphers.1            = ECDHE-ECDSA-AES256-GCM-SHA384
management.ssl.ciphers.2            = ECDHE-RSA-AES256-GCM-SHA384
management.ssl.ciphers.3            = ECDHE-ECDSA-AES256-SHA384
management.ssl.ciphers.4            = ECDHE-RSA-AES256-SHA384
management.ssl.ciphers.5            = ECDH-ECDSA-AES256-GCM-SHA384
management.ssl.ciphers.6            = ECDH-RSA-AES256-GCM-SHA384
management.ssl.ciphers.7            = ECDH-ECDSA-AES256-SHA384
management.ssl.ciphers.8            = ECDH-RSA-AES256-SHA384
management.ssl.ciphers.9            = DHE-RSA-AES256-GCM-SHA384


====================================================================================
Section: Installation
File System Location: /etc/rabbitmq/rabbitmq-env.conf
Step: 19
====================================================================================

# https://blog.sleeplessbeastie.eu/2020/02/03/how-to-specify-rabbitmq-node-name/
NODENAME=rabbit
USE_LONGNAME=true


====================================================================================
Section: Installation
File System Location: /etc/rabbitmq/rabbitmq-env.conf
Step: 56
====================================================================================

# NOTE: the following path is system dependent and will change between Erlang versions
ERL_SSL_PATH=/usr/lib64/erlang/lib/ssl-10.0/ebin


# Flag                  Description
# ======                =============
# -pa                   $ERL_SSL_PATH prepends the directory ERL_SSL_PATH points at to the code path
# -proto_dist           inet_tls tells the runtime to encrypt inter-node communication
# -ssl_dist_optfile     tells the runtime where to find its inter-node TLS configuration file
SERVER_ADDITIONAL_ERL_ARGS="-pa               $ERL_SSL_PATH
                            -proto_dist       inet_tls
                            -ssl_dist_optfile /etc/rabbitmq/inter_node_tls.config"


RABBITMQ_CTL_ERL_ARGS="-pa               $ERL_SSL_PATH
                       -proto_dist       inet_tls
                       -ssl_dist_optfile /etc/rabbitmq/inter_node_tls.config"


# https://blog.sleeplessbeastie.eu/2020/02/03/how-to-specify-rabbitmq-node-name/
NODENAME=rabbit
USE_LONGNAME=true


====================================================================================
Section: Installation
File System Location: /etc/rabbitmq/inter_node_tls.config
====================================================================================

[
  {server, [
    {cacertfile,           "/etc/pki/tls/certs/domain.local.CA.cer.pem"},
    {certfile,             "/etc/pki/tls/certs/rabbitmqcluster.pem"},
    {keyfile,              "/etc/pki/tls/certs/rabbitmq.key"},
    {depth,                5},
    {secure_renegotiate,   true},
    {verify,               verify_peer},
    {fail_if_no_peer_cert, false}
  ]},
  {client, [
    {secure_renegotiate,   true}
  ]}
].


====================================================================================
Section: Installation
File System Location: /etc/keepalived/keepalived.conf
Node: 1
====================================================================================

# Define scripts to check RabbitMQ health
vrrp_script health_check_port_5671 {
    # How this particular health check works:
    #
    # Run a script to verify port TCP 5671 on localhost is open.  If yes, give exit code 0 (success).
    # If not open, give exit code 1 (failure).  If exit code 0 is seen by this health check, then Keepalived
    # will add priority weight (10 points) to this node.  If an exit code other than zero is returned, then
    # Keepalived will not add priority weight.
    #
    # 
    # How health checks (in general) work:
    # 
    # When all keepalived nodes start, they are assigned a base priority.  This priority is hardcoded in this 
    # configuration file to 1.
    #
    # The keepalived nodes compare each other's priorities and the one with the highest priority becomes
    # the MASTER.  Because this configuration file is the same across all our keepalived nodes, each node
    # will have the same priority value (1).
    # 
    # When more than one node is tied for 1st Place, they will hold an election to decide who will be the MASTER.
    #
    # But before the election, each node will run a series of health check scripts.  If the script has an exit code of zero, 
    # that node has passed the health check and their priority is raised by the health check weight (10).
    # 
    # If the script has a non-zero exit code, that node has failed the health check.  Their priority is reduced
    # the same amount as the weight of this health check (10).
    #   Example: Default_Priority + Health_Check_Weight (Pass) = Final_Priority
    #                 1           +         10                 =      11
    #
    #    Later, when it fails this health check, the points get taken away:
    #   Example: Current_Priority - Health_Check_Weight (Fail) = Final_Priority
    #                 11           -         10                 =      1
    #
    # Health check weights can only be removed after they have been added.  If a node has a current priority of 1,
    # and fails a health check worth 10, the node will retain the priority of 1.  The node will never have a 
    # negative priority (1 - 10 = -9).  I've experimented and negative values don't work.
    # 
    #
    #
    # So what happens when a keepalived node is booted, or the service is restarted when there is no network?
    # Answer:  That node will not hear the priority broadcasts of the other Keepalived nodes and therefore
    # think it has the highest priority.  Having the highest priority, this node will assume the role of MASTER
    # and begin to advertise (broadcast) the keepalived virtual IP.  Of course, these broadcasts go
    # nowhere because there is no network connectivity, but the cluster virtual IP might show up in scripts
    # or on a hypervisor web portal for that VM because the IP address data is extracted through a back-channel.
    #
    # Rephrased Answer:  If keepalived process cannot see any other VRRP speaker for a certain virtual_router_id
    # (in our case #99), then the keepalived process on this server will believe itself to be the keepalived 
    # cluster member with the highest priority, and thus the rightful MASTER.
    # 
    # But what about the failing health check? I thought failing a health check was supposed to stop a node from becoming MASTER?
    # Nope.  All it does is reduce the node's priority to make it more difficult to become MASTER.

    # This health check must be located in a certain directory that is allowed by SELinux
    # To know more, visit https://bits.enigmabridge.com/articles/2016-11/keepalived-high-available-setup.html#scripts-not-working---fault-state

    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5671.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_5672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_15672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-15672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_25672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-25672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_cluster {
    # NOTE:  This check can take a long time.
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-cluster.sh"
    weight   10
    interval 30                      # Check every 30 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_queue_master {
    # NOTE:  This check can take a long time.
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-queue-master.sh"
    weight   10
    interval 60                      # Check every 60 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}



# Create a VRRP instance 
vrrp_instance RabbitMQ_Virtual_IP {

    # The initial state of each keepalived node. This option isn't
    # really all that valuable, since an election will occur
    # and the host with the highest priority will become
    # the master. 
    state BACKUP

    # The interface keepalived will manage
    interface ens192

    # This particular VLAN/Distributed Switch does not allow multicast traffic.
    # Must tell Keepalived to use unicast instead of multicast.
    unicast_src_ip A.A.A.12     # Unicast specific option, this is the IP of the interface keepalived listens on
    unicast_peer {              # Unicast specific option, this is the IP of the peer instance 
      #A.A.A.11                 # Disable yourself
      A.A.A.12
      A.A.A.13
    }

    # The virtual router id number to assign the routers to
    virtual_router_id 99

    # The priority to assign to this device. This controls
    # who will become the MASTER and BACKUP for a given
    # VRRP instance.
    priority 1

    # How many seconds to wait until a gratuitous arp is sent
    #garp_master_delay 2

    # Allow a lower priority machine to retain the master role
    # when a higher priority machine comes online.
    #nopreeempt

    # How often to send out VRRP advertisements
    advert_int 1

    # Execute a notification script when a host transitions to
    # MASTER or BACKUP, or when a fault occurs. The arguments
    # passed to the script are:
    #  $1 - "GROUP"|"INSTANCE"
    #  $2 = name of group or instance
    #  $3 = target state of transition
    # Sample: VRRP-notification.sh VRRP_ROUTER1 BACKUP 100
    #notify "/usr/local/bin/VRRP-notification.sh"

    # Send an SMTP alert during a state transition
    #smtp_alert

    # Authenticate the remote endpoints via a simple 
    # username/password combination
    authentication {
        auth_type AH
        auth_pass 12345678
    }
    # The virtual IP addresses to float between nodes. The
    # label statement can be used to bring an interface 
    # online to represent the virtual IP.
    virtual_ipaddress {
        A.A.A.10/24
    }

    # Health Check Script(s)
    track_script {
        health_check_port_5671
    } 
    track_script {
        health_check_port_5672
    }
    track_script {
        health_check_port_15672
    }
    track_script {
        health_check_port_25672
    }
    track_script {
        health_check_cluster
    }
    track_script {
        health_check_queue_master
    }
}


====================================================================================
Section: Installation
File System Location: /etc/keepalived/keepalived.conf
Node: 2
====================================================================================

# Define scripts to check RabbitMQ health
vrrp_script health_check_port_5671 {
    # How this particular health check works:
    #
    # Run a script to verify port TCP 5671 on localhost is open.  If yes, give exit code 0 (success).
    # If not open, give exit code 1 (failure).  If exit code 0 is seen by this health check, then Keepalived
    # will add priority weight (10 points) to this node.  If an exit code other than zero is returned, then
    # Keepalived will not add priority weight.
    #
    # 
    # How health checks (in general) work:
    # 
    # When all keepalived nodes start, they are assigned a base priority.  This priority is hardcoded in this 
    # configuration file to 1.
    #
    # The keepalived nodes compare each other's priorities and the one with the highest priority becomes
    # the MASTER.  Because this configuration file is the same across all our keepalived nodes, each node
    # will have the same priority value (1).
    # 
    # When more than one node is tied for 1st Place, they will hold an election to decide who will be the MASTER.
    #
    # But before the election, each node will run a series of health check scripts.  If the script has an exit code of zero, 
    # that node has passed the health check and their priority is raised by the health check weight (10).
    # 
    # If the script has a non-zero exit code, that node has failed the health check.  Their priority is reduced
    # the same amount as the weight of this health check (10).
    #   Example: Default_Priority + Health_Check_Weight (Pass) = Final_Priority
    #                 1           +         10                 =      11
    #
    #    Later, when it fails this health check, the points get taken away:
    #   Example: Current_Priority - Health_Check_Weight (Fail) = Final_Priority
    #                 11           -         10                 =      1
    #
    # Health check weights can only be removed after they have been added.  If a node has a current priority of 1,
    # and fails a health check worth 10, the node will retain the priority of 1.  The node will never have a 
    # negative priority (1 - 10 = -9).  I've experimented and negative values don't work.
    # 
    #
    #
    # So what happens when a keepalived node is booted, or the service is restarted when there is no network?
    # Answer:  That node will not hear the priority broadcasts of the other Keepalived nodes and therefore
    # think it has the highest priority.  Having the highest priority, this node will assume the role of MASTER
    # and begin to advertise (broadcast) the keepalived virtual IP.  Of course, these broadcasts go
    # nowhere because there is no network connectivity, but the cluster virtual IP might show up in scripts
    # or on a hypervisor web portal for that VM because the IP address data is extracted through a back-channel.
    #
    # Rephrased Answer:  If keepalived process cannot see any other VRRP speaker for a certain virtual_router_id
    # (in our case #99), then the keepalived process on this server will believe itself to be the keepalived 
    # cluster member with the highest priority, and thus the rightful MASTER.
    # 
    # But what about the failing health check? I thought failing a health check was supposed to stop a node from becoming MASTER?
    # Nope.  All it does is reduce the node's priority to make it more difficult to become MASTER.

    # This health check must be located in a certain directory that is allowed by SELinux
    # To know more, visit https://bits.enigmabridge.com/articles/2016-11/keepalived-high-available-setup.html#scripts-not-working---fault-state

    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5671.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_5672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_15672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-15672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_25672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-25672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_cluster {
    # NOTE:  This check can take a long time.
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-cluster.sh"
    weight   10
    interval 30                      # Check every 30 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_queue_master {
    # NOTE:  This check can take a long time.
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-queue-master.sh"
    weight   10
    interval 60                      # Check every 60 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}



# Create a VRRP instance 
vrrp_instance RabbitMQ_Virtual_IP {

    # The initial state of each keepalived node. This option isn't
    # really all that valuable, since an election will occur
    # and the host with the highest priority will become
    # the master. 
    state BACKUP

    # The interface keepalived will manage
    interface ens192

    # This particular VLAN/Distributed Switch does not allow multicast traffic.
    # Must tell Keepalived to use unicast instead of multicast.
    unicast_src_ip A.A.A.12     # Unicast specific option, this is the IP of the interface keepalived listens on
    unicast_peer {                 # Unicast specific option, this is the IP of the peer instance 
      A.A.A.11
      #A.A.A.12                 # Disable yourself
      A.A.A.13
    }

    # The virtual router id number to assign the routers to
    virtual_router_id 99

    # The priority to assign to this device. This controls
    # who will become the MASTER and BACKUP for a given
    # VRRP instance.
    priority 1

    # How many seconds to wait until a gratuitous arp is sent
    #garp_master_delay 2

    # Allow a lower priority machine to retain the master role
    # when a higher priority machine comes online.
    #nopreeempt

    # How often to send out VRRP advertisements
    advert_int 1

    # Execute a notification script when a host transitions to
    # MASTER or BACKUP, or when a fault occurs. The arguments
    # passed to the script are:
    #  $1 - "GROUP"|"INSTANCE"
    #  $2 = name of group or instance
    #  $3 = target state of transition
    # Sample: VRRP-notification.sh VRRP_ROUTER1 BACKUP 100
    #notify "/usr/local/bin/VRRP-notification.sh"

    # Send an SMTP alert during a state transition
    #smtp_alert

    # Authenticate the remote endpoints via a simple 
    # username/password combination
    authentication {
        auth_type AH
        auth_pass 12345678
    }
    # The virtual IP addresses to float between nodes. The
    # label statement can be used to bring an interface 
    # online to represent the virtual IP.
    virtual_ipaddress {
        A.A.A.10/24
    }

    # Health Check Script(s)
    track_script {
        health_check_port_5671
    } 
    track_script {
        health_check_port_5672
    }
    track_script {
        health_check_port_15672
    }
    track_script {
        health_check_port_25672
    }
    track_script {
        health_check_cluster
    }
    track_script {
        health_check_queue_master
    }
}


====================================================================================
Section: Installation
File System Location: /etc/keepalived/keepalived.conf
Node: 3
====================================================================================

# Define scripts to check RabbitMQ health
vrrp_script health_check_port_5671 {
    # How this particular health check works:
    #
    # Run a script to verify port TCP 5671 on localhost is open.  If yes, give exit code 0 (success).
    # If not open, give exit code 1 (failure).  If exit code 0 is seen by this health check, then Keepalived
    # will add priority weight (10 points) to this node.  If an exit code other than zero is returned, then
    # Keepalived will not add priority weight.
    #
    # 
    # How health checks (in general) work:
    # 
    # When all keepalived nodes start, they are assigned a base priority.  This priority is hardcoded in this 
    # configuration file to 1.
    #
    # The keepalived nodes compare each other's priorities and the one with the highest priority becomes
    # the MASTER.  Because this configuration file is the same across all our keepalived nodes, each node
    # will have the same priority value (1).
    # 
    # When more than one node is tied for 1st Place, they will hold an election to decide who will be the MASTER.
    #
    # But before the election, each node will run a series of health check scripts.  If the script has an exit code of zero, 
    # that node has passed the health check and their priority is raised by the health check weight (10).
    # 
    # If the script has a non-zero exit code, that node has failed the health check.  Their priority is reduced
    # the same amount as the weight of this health check (10).
    #   Example: Default_Priority + Health_Check_Weight (Pass) = Final_Priority
    #                 1           +         10                 =      11
    #
    #    Later, when it fails this health check, the points get taken away:
    #   Example: Current_Priority - Health_Check_Weight (Fail) = Final_Priority
    #                 11           -         10                 =      1
    #
    # Health check weights can only be removed after they have been added.  If a node has a current priority of 1,
    # and fails a health check worth 10, the node will retain the priority of 1.  The node will never have a 
    # negative priority (1 - 10 = -9).  I've experimented and negative values don't work.
    # 
    #
    #
    # So what happens when a keepalived node is booted, or the service is restarted when there is no network?
    # Answer:  That node will not hear the priority broadcasts of the other Keepalived nodes and therefore
    # think it has the highest priority.  Having the highest priority, this node will assume the role of MASTER
    # and begin to advertise (broadcast) the keepalived virtual IP.  Of course, these broadcasts go
    # nowhere because there is no network connectivity, but the cluster virtual IP might show up in scripts
    # or on a hypervisor web portal for that VM because the IP address data is extracted through a back-channel.
    #
    # Rephrased Answer:  If keepalived process cannot see any other VRRP speaker for a certain virtual_router_id
    # (in our case #99), then the keepalived process on this server will believe itself to be the keepalived 
    # cluster member with the highest priority, and thus the rightful MASTER.
    # 
    # But what about the failing health check? I thought failing a health check was supposed to stop a node from becoming MASTER?
    # Nope.  All it does is reduce the node's priority to make it more difficult to become MASTER.

    # This health check must be located in a certain directory that is allowed by SELinux
    # To know more, visit https://bits.enigmabridge.com/articles/2016-11/keepalived-high-available-setup.html#scripts-not-working---fault-state

    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5671.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_5672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_15672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-15672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_port_25672 {
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-25672.sh"
    weight   10
    interval 2                       # Check every 2 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_cluster {
    # NOTE:  This check can take a long time.
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-cluster.sh"
    weight   10
    interval 30                      # Check every 30 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}

vrrp_script health_check_queue_master {
    # NOTE:  This check can take a long time.
    script   "/usr/libexec/keepalived/keepalived-rabbitmq-health-check-queue-master.sh"
    weight   10
    interval 60                      # Check every 60 seconds
    fall     1                       # Require 1 failure for KO
    rise     1                       # Require 1 success for OK
}



# Create a VRRP instance 
vrrp_instance RabbitMQ_Virtual_IP {

    # The initial state of each keepalived node. This option isn't
    # really all that valuable, since an election will occur
    # and the host with the highest priority will become
    # the master. 
    state BACKUP

    # The interface keepalived will manage
    interface ens192

    # This particular VLAN/Distributed Switch does not allow multicast traffic.
    # Must tell Keepalived to use unicast instead of multicast.
    unicast_src_ip A.A.A.12     # Unicast specific option, this is the IP of the interface keepalived listens on
    unicast_peer {                 # Unicast specific option, this is the IP of the peer instance 
      A.A.A.11
      A.A.A.12
      #A.A.A.13                 # Disable yourself
    }

    # The virtual router id number to assign the routers to
    virtual_router_id 99

    # The priority to assign to this device. This controls
    # who will become the MASTER and BACKUP for a given
    # VRRP instance.
    priority 1

    # How many seconds to wait until a gratuitous arp is sent
    #garp_master_delay 2

    # Allow a lower priority machine to retain the master role
    # when a higher priority machine comes online.
    #nopreeempt

    # How often to send out VRRP advertisements
    advert_int 1

    # Execute a notification script when a host transitions to
    # MASTER or BACKUP, or when a fault occurs. The arguments
    # passed to the script are:
    #  $1 - "GROUP"|"INSTANCE"
    #  $2 = name of group or instance
    #  $3 = target state of transition
    # Sample: VRRP-notification.sh VRRP_ROUTER1 BACKUP 100
    #notify "/usr/local/bin/VRRP-notification.sh"

    # Send an SMTP alert during a state transition
    #smtp_alert

    # Authenticate the remote endpoints via a simple 
    # username/password combination
    authentication {
        auth_type AH
        auth_pass 12345678
    }
    # The virtual IP addresses to float between nodes. The
    # label statement can be used to bring an interface 
    # online to represent the virtual IP.
    virtual_ipaddress {
        A.A.A.10/24
    }

    # Health Check Script(s)
    track_script {
        health_check_port_5671
    } 
    track_script {
        health_check_port_5672
    }
    track_script {
        health_check_port_15672
    }
    track_script {
        health_check_port_25672
    }
    track_script {
        health_check_cluster
    }
    track_script {
        health_check_queue_master
    }
}


====================================================================================
Section: Installation
File System Location: /usr/libexec/keepalived/keepalived-rabbitmq-health-check-cluster.sh
====================================================================================

#!/bin/bash

# RabbitMQ health check
rabbitmqctl --timeout 10 cluster_status &>/dev/null

# Exit codes are stored in "$?". Exit code values:
# Zero     = RabbitMQ is healthy.     Worthy of earning points to host the VIP.
# Non-zero = RabbitMQ is not healthy. Not a good candidate for hosting the VIP.  Zero points earned.

# Effect of Timeout:
# When node and RMQ is up, but network is down, and "--timeout" is not specified, eventually the command will return "69" or "64".
# When node and RMQ is up, but network is down, and "--timeout" is     specified, the command will return "75", which is "timeout reached".

# Return the exit code of the RabbitMQ health check
if [ "$?" -gt 0 ]
then exit 1
else exit 0
fi


====================================================================================
Section: Installation
File System Location: /usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5671.sh
====================================================================================

#!/bin/bash

# RabbitMQ health check

# Exit codes are stored in "$?". Exit code values:
# Zero     = RabbitMQ is healthy.     Worthy of earning points to host the VIP.
# Non-zero = RabbitMQ is not healthy. Not a good candidate for hosting the VIP.  Zero points earned.

# Test port TCP 25672, which is used for inter-node and CLI tools communications.
# The exit codes are always 1 or 0.
lsof -i -P -n | grep LISTEN | grep :5671 &>/dev/null
exit $?


====================================================================================
Section: Installation
File System Location: /usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-5672.sh
====================================================================================

#!/bin/bash

# RabbitMQ health check

# Exit codes are stored in "$?". Exit code values:
# Zero     = RabbitMQ is healthy.     Worthy of earning points to host the VIP.
# Non-zero = RabbitMQ is not healthy. Not a good candidate for hosting the VIP.  Zero points earned.

# Test port TCP 25672, which is used for inter-node and CLI tools communications.
# The exit codes are always 1 or 0.
lsof -i -P -n | grep LISTEN | grep :5672 &>/dev/null
exit $?


====================================================================================
Section: Installation
File System Location: /usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-15672.sh
====================================================================================

#!/bin/bash

# RabbitMQ health check

# Exit codes are stored in "$?". Exit code values:
# Zero     = RabbitMQ is healthy.     Worthy of earning points to host the VIP.
# Non-zero = RabbitMQ is not healthy. Not a good candidate for hosting the VIP.  Zero points earned.

# Test port TCP 25672, which is used for inter-node and CLI tools communications.
# The exit codes are always 1 or 0.
lsof -i -P -n | grep LISTEN | grep :15672 &>/dev/null
exit $?


====================================================================================
Section: Installation
File System Location: /usr/libexec/keepalived/keepalived-rabbitmq-health-check-port-25672.sh
====================================================================================

#!/bin/bash

# RabbitMQ health check

# Exit codes are stored in "$?". Exit code values:
# Zero     = RabbitMQ is healthy.     Worthy of earning points to host the VIP.
# Non-zero = RabbitMQ is not healthy. Not a good candidate for hosting the VIP.  Zero points earned.

# Test port TCP 25672, which is used for inter-node and CLI tools communications.
# The exit codes are always 1 or 0.
lsof -i -P -n | grep LISTEN | grep :25672 &>/dev/null
exit $?


====================================================================================
Section: Installation
File System Location: /usr/libexec/keepalived/keepalived-rabbitmq-health-check-queue-master.sh
====================================================================================

#!/bin/bash

# List all queues that are online
# Ignore any lines that contain "{"
OnlineQueueCount=$(rabbitmqctl list_queues --timeout 1 --quiet --online --no-table-headers | grep -v { | wc -l)

# Bias this health check towards failure
OnlineQueueCount=$((OnlineQueueCount + 1))

# Calculate the half-way point.  The local RabbitMQ server must be over this threshold to earn these keepalived healthcheck points.
HalfOnlineQueueCount=$(echo "scale=2 ; $OnlineQueueCount / 2" | bc)

# List queues that are online and local to this server
LocalQueueCount=$(rabbitmqctl list_queues --timeout 1 --quiet --online --local --no-table-headers | grep -v { | wc -l)

echo "$HalfOnlineQueueCount vs $LocalQueueCount"

#if (( $LocalQueueCount > $HalfOnlineQueueCount ))
if (( $(echo "$HalfOnlineQueueCount < $LocalQueueCount" | bc -l) ))
then exit 0
else exit 1
fi

# Exit codes are stored in "$?". Exit code values:
# Zero     = RabbitMQ is healthy.     Worthy of earning points to host the VIP.
# Non-zero = RabbitMQ is not healthy. Not a good candidate for hosting the VIP.  Zero points earned.


====================================================================================
Section: Installation
File System Location: /etc/firewalld/services/vrrp.xml
====================================================================================

<?xml version="1.0" encoding="utf-8"?>
<service>
    <short>VRRP</short>
    <description>Virtual Router Redundancy Protocol</description>
    <port protocol="ah" port=""/>
</service>


====================================================================================
Section: Maintenance
Subsection:  Keepalived
File System Location: Crontab Contents
====================================================================================

* * * * * /root/RestartKeepalivedIf100PercentCPU.sh >/dev/null 2>/root/crontab.errorlog


====================================================================================
Section: Maintenance
Subsection: Keepalived
Title: Restart Keepalived Script
File System Location: /root/RestartKeepalivedIf100PercentCPU.sh
====================================================================================

#/bin/bash -P
export TERM=xterm
export COLUMNS=512

DoPingCheck=true
DoCPUCheck=true

echo $(date -u) "Starting" | tee /root/RestartKeepalivedIf100PercentCPU_RecentRun.log

if [ "$DoPingCheck" = true ]
then
    IpResults=$(/sbin/ip addr show | grep “A\. A\. A\.10”)
    #echo $IpResults | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log
    if [ ! -z "$IpResults" ]
    then
        echo $(date -u) "This machine hosts the VIP." | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log
        PingResult=$(ping -c 5 A.A.A.10 &> /dev/null; echo $?)
        # 0 = Host Responded
        # 2 = Host Unreachable
        if [ $PingResult -ne 0 ]
        then
            # The VIP did not respond to pings.
            # Restart KeepaliveD.
            echo $(date -u) "The VIP did NOT respond to pings.  Restarting." | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log /root/RestartKeepalivedIf100PercentCPU_Historical.log
            /bin/systemctl restart keepalived

            # Skip CPU check
            DoCPUCheck=false
        else
            echo $(date -u) "The VIP did respond to pings." | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log
        fi
    else
        echo $(date -u) "This machine does NOT host the VIP." | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log
    fi
fi

if [ "$DoCPUCheck" = true ]
then
    # Monitor the CPU utilitization for KeepaliveD service for 10 seconds
    # Set KeepalivedCpuUsage to the average utilization over 10 seconds
    KeepalivedCpuUsage=$(top -n 5 -i -b | grep "keepalived" | awk '{s+=$9}END{print s/NR}')

    # Put into the log file what top is returning
    # top -n 5 -b -i | grep "keepalived" >> /root/RestartKeepalivedIf100PercentCPU_RecentRun.log

    echo $(date -u) "KeepaliveD CPU Utilization:" $KeepalivedCpuUsage "%" | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log

    # If average CPU utilization over 10 seconds is greater than 75%, restart keepalived
    if (( $(echo "$KeepalivedCpuUsage > 90" | bc -l) ))
    then
        echo $(date -u) "KeepaliveD at/near 100% CPU.  Restarting." | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log /root/RestartKeepalivedIf100PercentCPU_Historical.log
        /bin/systemctl restart keepalived
    else
        echo $(date -u) "KeepaliveD CPU utilization is OK." | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log
    fi
fi

echo $(date -u) "Finishing" | tee -a /root/RestartKeepalivedIf100PercentCPU_RecentRun.log