0

I am trying to deploy pgpool in native replication mode with watchdog to manage 2 nodes for Active-Standby.
When I start pgpool2 on both servers, they each claim to be the only node alive and become master.

I have lifecheck method of heartbeat and set the heartbeat destinations to the other host for each host. I confirmed that I can send UDP messages between hosts with nc. Am I missing or misunderstanding something?

Also I don't understand what the watchdog port actually does, if the heartbeat port send and receive the health check and pcp has it's own port for management, what does the watchdog TCP socket do?

pgpool.conf from 1st server, second server's config is the same except for the 01/02 in the hostnames. Config has been trimmed down a little, mostly just comments and unrelated stuff removed for brevity (logging, cache)

#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------

# - pgpool Connection Settings -
listen_addresses = '*'
port = 6432
socket_dir = '/var/run/postgresql'
listen_backlog_multiplier = 2
serialize_accept = off
pcp_listen_addresses = '*'
pcp_port = 9898
pcp_socket_dir = '/var/run/postgresql'

# - Backend Connection Settings -
backend_hostname0 = 'pgsql01.example.com'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/var/lib/pgsql/data'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = 'pgsql02.example.com'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/var/lib/pgsql/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'

# - Authentication -
enable_pool_hba = on
pool_passwd = 'pool_passwd'
authentication_timeout = 60

# - SSL Connections -
ssl = off

#------------------------------------------------------------------------------
# POOLS
#------------------------------------------------------------------------------

# - Concurrent session and pool size -
num_init_children = 32
max_pool = 4
# - Life time -
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0

#------------------------------------------------------------------------------
# CONNECTION POOLING
#------------------------------------------------------------------------------

connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'


#------------------------------------------------------------------------------
# REPLICATION MODE
#------------------------------------------------------------------------------

replication_mode = on
replicate_select = off
insert_lock = on
lobj_lock_table = ''
# - Degenerate handling -
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off


#------------------------------------------------------------------------------
# LOAD BALANCING MODE
#------------------------------------------------------------------------------

load_balance_mode = on
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'nextval,setval,nextval,setval'
database_redirect_preference_list = ''
app_name_redirect_preference_list = ''
allow_sql_comments = on

#------------------------------------------------------------------------------
# MASTER/SLAVE MODE
#------------------------------------------------------------------------------

master_slave_mode = off
master_slave_sub_mode = 'stream'
# - Streaming -
sr_check_period = 0
sr_check_user = 'nobody'
sr_check_password = ''
sr_check_database = 'postgres'
delay_threshold = 0
# - Special commands -
follow_master_command = ''

#------------------------------------------------------------------------------
# HEALTH CHECK GLOBAL PARAMETERS
#------------------------------------------------------------------------------

health_check_period = 0
health_check_timeout = 20
health_check_user = 'nobody'
health_check_password = ''
health_check_database = ''
health_check_max_retries = 0
health_check_retry_delay = 1
connect_timeout = 10000

#------------------------------------------------------------------------------
# FAILOVER AND FAILBACK
#------------------------------------------------------------------------------

failover_command = ''
failback_command = ''
fail_over_on_backend_error = on
search_primary_node_timeout = 300

#------------------------------------------------------------------------------
# ONLINE RECOVERY
#------------------------------------------------------------------------------

recovery_user = 'nobody'
recovery_password = ''
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0

#------------------------------------------------------------------------------
# WATCHDOG
#------------------------------------------------------------------------------

# - Enabling -
use_watchdog = on
trusted_servers = 'pgsql-test.example.com,proxy.example.com,proxy01.example.com,proxy02.example.com'
ping_path = '/bin'
# - Watchdog communication Settings -
wd_hostname = 'pgsql01.example.com'
wd_port = 9000
wd_priority = 2
wd_authkey = 'Hunter1'
wd_ipc_socket_dir = '/tmp'

# - Virtual IP control Setting -
delegate_IP = '10.10.10.92'
if_cmd_path = '/sbin'
if_up_cmd = 'ip addr add $_IP_$/23 dev ens192 label ens192:0'
if_down_cmd = 'ip addr del $_IP_$/23 dev ens192'
arping_path = '/usr/bin'
arping_cmd = 'arping -U $_IP_$ -w 1'

# - Behaivor on escalation Setting -
clear_memqcache_on_escalation = on
wd_escalation_command = ''
wd_de_escalation_command = ''

# - Watchdog consensus settings for failover -
failover_when_quorum_exists = on
failover_require_consensus = on
allow_multiple_failover_requests_from_node = off

# -- common --
wd_monitoring_interfaces_list = ''  # Comma separated list of interfaces names to monitor.
wd_lifecheck_method = 'heartbeat'
wd_interval = 5

# -- heartbeat mode --
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'pgsql02.example.com'
heartbeat_destination_port0 = 9694 
heartbeat_device0 = ''

# -- query mode --
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''

# - Other pgpool Connection Settings -
other_pgpool_hostname0 = 'pgsql02'
other_pgpool_port0 = 6432
other_wd_port0 = 9694
virullius
  • 988
  • 8
  • 22

1 Answers1

0

I got my answer from the pgpool-general mailing list.

other_wd_port0 should have been 9000 in my config instead of 9694.

virullius
  • 988
  • 8
  • 22