2024-03-29 23:36:40 -03:00
|
|
|
import requests
|
|
|
|
import time
|
|
|
|
from datetime import datetime
|
|
|
|
from subprocess import Popen, PIPE
|
2024-04-15 13:59:13 -03:00
|
|
|
import os
|
|
|
|
import uuid
|
|
|
|
|
2024-03-29 23:36:40 -03:00
|
|
|
|
2024-04-11 12:54:28 -03:00
|
|
|
DEBUG = False
|
2024-04-15 13:59:13 -03:00
|
|
|
API_ADDRESS = "http://routerfleet:8001"
|
2024-04-11 12:54:28 -03:00
|
|
|
|
|
|
|
HOST_LIST_URL = f"{API_ADDRESS}/monitoring/export_router_list/"
|
|
|
|
UPDATE_STATUS_URL = f"{API_ADDRESS}/monitoring/update_router_status/"
|
|
|
|
CONFIG_TIMESTAMP_URL = f"{API_ADDRESS}/monitoring/router_config_timestamp/"
|
2024-03-29 23:36:40 -03:00
|
|
|
UPDATE_HOST_LIST_INTERVAL = 600 # How often to update the router list in seconds
|
|
|
|
MONITOR_INTERVAL = 60 # How often to monitor each router in seconds
|
|
|
|
MAX_NOTIFICATIONS_PER_MONITOR_INTERVAL = 50 # Throttle the number of notifications sent to the remote API
|
2024-04-11 12:54:28 -03:00
|
|
|
|
2024-03-29 23:36:40 -03:00
|
|
|
|
|
|
|
# Global variables
|
|
|
|
host_list = []
|
|
|
|
host_list_update_timestamp = 0
|
|
|
|
notification_count = 0
|
2024-04-11 12:54:28 -03:00
|
|
|
current_router_config_timestamp = ''
|
|
|
|
remote_router_config_timestamp = ''
|
2024-04-15 13:59:13 -03:00
|
|
|
api_key = ''
|
2024-03-29 23:36:40 -03:00
|
|
|
|
|
|
|
|
|
|
|
def get_verbose_status(status):
|
|
|
|
return "online" if status else "offline"
|
|
|
|
|
|
|
|
|
2024-04-15 13:59:13 -03:00
|
|
|
def get_api_key():
|
|
|
|
api_key_temp = None
|
|
|
|
api_file_path = "/app_secrets/monitoring_key"
|
|
|
|
|
|
|
|
if os.path.exists(api_file_path) and os.path.isfile(api_file_path):
|
|
|
|
with open(api_file_path, 'r') as api_file:
|
|
|
|
api_file_content = api_file.read().strip()
|
|
|
|
try:
|
|
|
|
uuid_test = uuid.UUID(api_file_content)
|
|
|
|
|
|
|
|
if str(uuid_test) == api_file_content:
|
|
|
|
api_key_temp = str(uuid_test)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return api_key_temp
|
|
|
|
|
|
|
|
|
2024-04-11 12:54:28 -03:00
|
|
|
def update_router_config_timestamp():
|
2024-04-15 13:59:13 -03:00
|
|
|
global remote_router_config_timestamp, api_key
|
2024-04-11 12:54:28 -03:00
|
|
|
try:
|
2024-04-15 13:59:13 -03:00
|
|
|
response = requests.get(f"{CONFIG_TIMESTAMP_URL}?key={api_key}")
|
2024-04-11 12:54:28 -03:00
|
|
|
if response.status_code == 200:
|
|
|
|
remote_router_config_timestamp_temp = response.json()['router_config']
|
|
|
|
if remote_router_config_timestamp_temp != remote_router_config_timestamp:
|
|
|
|
remote_router_config_timestamp = remote_router_config_timestamp_temp
|
|
|
|
print(f"{datetime.now()} - Router config timestamp updated: {remote_router_config_timestamp}")
|
|
|
|
else:
|
|
|
|
print(f"{datetime.now()} - Router config timestamp unchanged: {remote_router_config_timestamp}")
|
|
|
|
else:
|
|
|
|
print(f"{datetime.now()} - Error updating router config timestamp: HTTP {response.status_code}")
|
|
|
|
except Exception as e:
|
|
|
|
print(f"{datetime.now()} - Exception updating router config timestamp: {e}")
|
|
|
|
return
|
|
|
|
|
|
|
|
|
2024-03-29 23:36:40 -03:00
|
|
|
def fetch_host_list():
|
2024-04-15 13:59:13 -03:00
|
|
|
global host_list_update_timestamp, current_router_config_timestamp, remote_router_config_timestamp, api_key
|
2024-03-29 23:36:40 -03:00
|
|
|
try:
|
2024-04-15 13:59:13 -03:00
|
|
|
response = requests.get(f"{HOST_LIST_URL}?key={api_key}")
|
2024-03-29 23:36:40 -03:00
|
|
|
if response.status_code == 200:
|
|
|
|
host_list_update_timestamp = time.time()
|
2024-04-11 12:54:28 -03:00
|
|
|
remote_router_config_timestamp = response.json()['router_config']
|
|
|
|
current_router_config_timestamp = remote_router_config_timestamp
|
2024-03-29 23:36:40 -03:00
|
|
|
return response.json()['router_list'], True
|
|
|
|
else:
|
|
|
|
print(f"{datetime.now()} - Error fetching host list: HTTP {response.status_code}")
|
|
|
|
except Exception as e:
|
|
|
|
print(f"{datetime.now()} - Exception fetching host list: {e}")
|
|
|
|
return [], False
|
|
|
|
|
|
|
|
|
|
|
|
def update_host_status(uuid, status):
|
2024-04-15 13:59:13 -03:00
|
|
|
global notification_count, api_key
|
2024-03-29 23:36:40 -03:00
|
|
|
if notification_count >= MAX_NOTIFICATIONS_PER_MONITOR_INTERVAL:
|
|
|
|
print(f"{datetime.now()} - Notification limit reached. Skipping Remote API update for {host_list[uuid]['address']}")
|
|
|
|
return # Skip if notification limit is reached
|
|
|
|
try:
|
2024-04-15 13:59:13 -03:00
|
|
|
response = requests.get(f"{UPDATE_STATUS_URL}?key={api_key}&uuid={uuid}&status={get_verbose_status(status)}")
|
2024-03-29 23:36:40 -03:00
|
|
|
if response.status_code == 200:
|
|
|
|
print(f"{datetime.now()} - Remote API Status updated for {host_list[uuid]['address']} to {get_verbose_status(status)}")
|
|
|
|
notification_count += 1
|
|
|
|
host_list[uuid]['online'] = status
|
|
|
|
else:
|
|
|
|
print(f"{datetime.now()} - Error updating status for {host_list[uuid]['address']}: HTTP {response.status_code}")
|
|
|
|
except Exception as e:
|
|
|
|
print(f"{datetime.now()} - Exception updating status for {host_list[uuid]['address']}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def check_host_status(host_uuid):
|
|
|
|
command = ["fping", host_list[host_uuid]['address']]
|
|
|
|
process = Popen(command, stdout=PIPE, stderr=PIPE)
|
|
|
|
stdout, stderr = process.communicate()
|
|
|
|
current_online = True if process.returncode == 0 else False
|
|
|
|
if DEBUG:
|
|
|
|
print(f"{datetime.now()} - {host_list[host_uuid]['address']} is {get_verbose_status(current_online)}")
|
|
|
|
if current_online != host_list[host_uuid]['online']:
|
|
|
|
print(f"{datetime.now()} - Status changed for {host_list[host_uuid]['address']} to {get_verbose_status(current_online)}")
|
|
|
|
update_host_status(host_uuid, current_online)
|
|
|
|
|
|
|
|
|
|
|
|
def update_and_monitor():
|
2024-04-15 13:59:13 -03:00
|
|
|
global host_list, host_list_update_timestamp, notification_count, current_router_config_timestamp, remote_router_config_timestamp, api_key
|
|
|
|
api_key = get_api_key()
|
|
|
|
if not api_key:
|
|
|
|
print(f"{datetime.now()} - Monitoring key not found or invalid. Exiting...")
|
|
|
|
exit(1)
|
|
|
|
|
2024-03-29 23:36:40 -03:00
|
|
|
while True:
|
2024-04-11 12:54:28 -03:00
|
|
|
update_router_config_timestamp()
|
2024-03-29 23:36:40 -03:00
|
|
|
current_time = time.time()
|
|
|
|
notification_count = 0
|
2024-04-11 12:54:28 -03:00
|
|
|
update_required = False
|
2024-03-29 23:36:40 -03:00
|
|
|
|
2024-04-11 12:54:28 -03:00
|
|
|
if not current_router_config_timestamp:
|
|
|
|
update_required = True
|
|
|
|
if current_router_config_timestamp != remote_router_config_timestamp:
|
|
|
|
update_required = True
|
2024-03-29 23:36:40 -03:00
|
|
|
if current_time - host_list_update_timestamp > UPDATE_HOST_LIST_INTERVAL:
|
2024-04-11 12:54:28 -03:00
|
|
|
update_required = True
|
|
|
|
|
|
|
|
if update_required:
|
|
|
|
print(f"{datetime.now()} - Update required. Fetching host list...")
|
2024-03-29 23:36:40 -03:00
|
|
|
new_host_list, fetch_host_list_success = fetch_host_list()
|
|
|
|
if fetch_host_list_success:
|
|
|
|
host_list = new_host_list
|
|
|
|
print(f"{datetime.now()} - host list updated.")
|
|
|
|
if DEBUG:
|
|
|
|
print(host_list)
|
2024-04-11 12:54:28 -03:00
|
|
|
else:
|
|
|
|
print(f"{datetime.now()} - No update required. Skipping host list update.")
|
|
|
|
if DEBUG:
|
|
|
|
print(f"{datetime.now()} - Current router config timestamp: {current_router_config_timestamp}")
|
|
|
|
print(f"{datetime.now()} - Remote router config timestamp: {remote_router_config_timestamp}")
|
2024-03-29 23:36:40 -03:00
|
|
|
|
|
|
|
if host_list:
|
|
|
|
if DEBUG:
|
|
|
|
print(f"{datetime.now()} - Monitoring host... Interval between each monitor: {MONITOR_INTERVAL / len(host_list)} seconds")
|
|
|
|
for host_uuid in host_list:
|
|
|
|
if DEBUG:
|
|
|
|
print(host_list[host_uuid])
|
|
|
|
check_host_status(host_uuid)
|
|
|
|
time.sleep(MONITOR_INTERVAL / len(host_list))
|
|
|
|
else:
|
|
|
|
print(f"{datetime.now()} - No host to monitor.")
|
|
|
|
time.sleep(MONITOR_INTERVAL)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2024-04-04 21:16:53 -03:00
|
|
|
print(f"{datetime.now()} - Monitoring container started, waiting for routerfleet container to start...")
|
2024-04-11 12:54:28 -03:00
|
|
|
if not DEBUG:
|
|
|
|
time.sleep(30) # Wait for the routerfleet container to start
|
2024-04-04 21:16:53 -03:00
|
|
|
print(f"{datetime.now()} - Starting monitoring service...")
|
2024-03-29 23:36:40 -03:00
|
|
|
update_and_monitor()
|
|
|
|
|
|
|
|
|