Skip to content

Commit

Permalink
robot_failover: add recheck to mitigate races - fixes #2
Browse files Browse the repository at this point in the history
  • Loading branch information
mkg20001 committed Oct 3, 2024
1 parent d389096 commit 70eac05
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion robot_failover.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def has_ip(ip_bin_path, ip, interface):
return bool(len(check_output([ip_bin_path, 'a', 's', interface, 'to', ip])))

def change_request(endstate, url, header, target_ip, ip_bin_path, floating_ip, interface, dummy_interface):
log_prefix = "[%s -> %s] " % (url, target_ip)
log_prefix = "[%s -> %s] S " % (url, target_ip)
if endstate == "BACKUP":
del_ip(ip_bin_path, floating_ip, interface)
if dummy_interface:
Expand All @@ -73,6 +73,7 @@ def change_request(endstate, url, header, target_ip, ip_bin_path, floating_ip, i
if dummy_interface:
del_ip(ip_bin_path, floating_ip, dummy_interface)
if header:
recheck = False
while True:
if not has_ip(ip_bin_path, floating_ip, interface):
normal_log(log_prefix + 'ip %s has vanished from interface %s, cancelling attempt to switch' % (floating_ip, interface))
Expand Down Expand Up @@ -100,6 +101,12 @@ def change_request(endstate, url, header, target_ip, ip_bin_path, floating_ip, i
normal_log(r.text)
else:
normal_log(log_prefix + 'done')
if not recheck:
recheck = True
log_prefix = "[%s -> %s] R " % (url, target_ip)
normal_log(log_prefix + 'rechecking in 30s...')
sleep(30)
continue
break
else:
normal_log(log_prefix + 'trying again in 120s...')
Expand Down

0 comments on commit 70eac05

Please sign in to comment.