Monitoring improved to update earlier when any router configuration changes

This commit is contained in:
Eduardo Silva 2024-04-11 12:54:28 -03:00
parent d7c44adc87
commit 23c4090ec1
6 changed files with 109 additions and 11 deletions

View file

@ -3,31 +3,58 @@ import time
from datetime import datetime from datetime import datetime
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
DEBUG = False
if DEBUG:
API_ADDRESS = "http://localhost:8000"
else:
API_ADDRESS = "http://routerfleet:8001"
HOST_LIST_URL = f"{API_ADDRESS}/monitoring/export_router_list/"
UPDATE_STATUS_URL = f"{API_ADDRESS}/monitoring/update_router_status/"
CONFIG_TIMESTAMP_URL = f"{API_ADDRESS}/monitoring/router_config_timestamp/"
UPDATE_HOST_LIST_INTERVAL = 600 # How often to update the router list in seconds UPDATE_HOST_LIST_INTERVAL = 600 # How often to update the router list in seconds
MONITOR_INTERVAL = 60 # How often to monitor each router in seconds MONITOR_INTERVAL = 60 # How often to monitor each router in seconds
MAX_NOTIFICATIONS_PER_MONITOR_INTERVAL = 50 # Throttle the number of notifications sent to the remote API MAX_NOTIFICATIONS_PER_MONITOR_INTERVAL = 50 # Throttle the number of notifications sent to the remote API
HOST_LIST_URL = "http://routerfleet:8001/monitoring/export_router_list/"
UPDATE_STATUS_URL = "http://routerfleet:8001/monitoring/update_router_status/"
DEBUG = False
# Global variables # Global variables
host_list = [] host_list = []
host_list_update_timestamp = 0 host_list_update_timestamp = 0
notification_count = 0 notification_count = 0
current_router_config_timestamp = ''
remote_router_config_timestamp = ''
def get_verbose_status(status): def get_verbose_status(status):
return "online" if status else "offline" return "online" if status else "offline"
def fetch_host_list(): def update_router_config_timestamp():
global host_list_update_timestamp global remote_router_config_timestamp
try:
response = requests.get(CONFIG_TIMESTAMP_URL)
if response.status_code == 200:
remote_router_config_timestamp_temp = response.json()['router_config']
if remote_router_config_timestamp_temp != remote_router_config_timestamp:
remote_router_config_timestamp = remote_router_config_timestamp_temp
print(f"{datetime.now()} - Router config timestamp updated: {remote_router_config_timestamp}")
else:
print(f"{datetime.now()} - Router config timestamp unchanged: {remote_router_config_timestamp}")
else:
print(f"{datetime.now()} - Error updating router config timestamp: HTTP {response.status_code}")
except Exception as e:
print(f"{datetime.now()} - Exception updating router config timestamp: {e}")
return
def fetch_host_list():
global host_list_update_timestamp, current_router_config_timestamp, remote_router_config_timestamp
try: try:
print(f"{datetime.now()} - Fetching host list...")
response = requests.get(HOST_LIST_URL) response = requests.get(HOST_LIST_URL)
if response.status_code == 200: if response.status_code == 200:
host_list_update_timestamp = time.time() host_list_update_timestamp = time.time()
remote_router_config_timestamp = response.json()['router_config']
current_router_config_timestamp = remote_router_config_timestamp
return response.json()['router_list'], True return response.json()['router_list'], True
else: else:
print(f"{datetime.now()} - Error fetching host list: HTTP {response.status_code}") print(f"{datetime.now()} - Error fetching host list: HTTP {response.status_code}")
@ -66,18 +93,33 @@ def check_host_status(host_uuid):
def update_and_monitor(): def update_and_monitor():
global host_list, host_list_update_timestamp, notification_count global host_list, host_list_update_timestamp, notification_count, current_router_config_timestamp, remote_router_config_timestamp
while True: while True:
update_router_config_timestamp()
current_time = time.time() current_time = time.time()
notification_count = 0 notification_count = 0
update_required = False
if not current_router_config_timestamp:
update_required = True
if current_router_config_timestamp != remote_router_config_timestamp:
update_required = True
if current_time - host_list_update_timestamp > UPDATE_HOST_LIST_INTERVAL: if current_time - host_list_update_timestamp > UPDATE_HOST_LIST_INTERVAL:
update_required = True
if update_required:
print(f"{datetime.now()} - Update required. Fetching host list...")
new_host_list, fetch_host_list_success = fetch_host_list() new_host_list, fetch_host_list_success = fetch_host_list()
if fetch_host_list_success: if fetch_host_list_success:
host_list = new_host_list host_list = new_host_list
print(f"{datetime.now()} - host list updated.") print(f"{datetime.now()} - host list updated.")
if DEBUG: if DEBUG:
print(host_list) print(host_list)
else:
print(f"{datetime.now()} - No update required. Skipping host list update.")
if DEBUG:
print(f"{datetime.now()} - Current router config timestamp: {current_router_config_timestamp}")
print(f"{datetime.now()} - Remote router config timestamp: {remote_router_config_timestamp}")
if host_list: if host_list:
if DEBUG: if DEBUG:
@ -94,7 +136,8 @@ def update_and_monitor():
if __name__ == "__main__": if __name__ == "__main__":
print(f"{datetime.now()} - Monitoring container started, waiting for routerfleet container to start...") print(f"{datetime.now()} - Monitoring container started, waiting for routerfleet container to start...")
time.sleep(30) # Wait for the routerfleet container to start if not DEBUG:
time.sleep(30) # Wait for the routerfleet container to start
print(f"{datetime.now()} - Starting monitoring service...") print(f"{datetime.now()} - Starting monitoring service...")
update_and_monitor() update_and_monitor()

View file

@ -1,10 +1,28 @@
from django.shortcuts import render from django.shortcuts import render
from router_manager.models import Router from router_manager.models import Router
from django.http import JsonResponse from django.http import JsonResponse
from django.utils import timezone
from routerfleet_tools.models import WebadminSettings
def view_router_config_timestamp(request):
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
if not webadmin_settings.router_config_last_updated:
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.monitoring_last_run = timezone.now()
webadmin_settings.save()
return JsonResponse({'router_config': webadmin_settings.router_config_last_updated.isoformat()})
def view_export_router_list(request): def view_export_router_list(request):
router_list = {} router_list = {}
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
webadmin_settings.monitoring_last_run = timezone.now()
if not webadmin_settings.router_config_last_updated:
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.save()
for router in Router.objects.filter(enabled=True, monitoring=True): for router in Router.objects.filter(enabled=True, monitoring=True):
router_list[str(router.uuid)] = { router_list[str(router.uuid)] = {
'address': router.address, 'address': router.address,
@ -13,7 +31,8 @@ def view_export_router_list(request):
'uuid': str(router.uuid), 'uuid': str(router.uuid),
} }
data = { data = {
'router_list': router_list 'router_list': router_list,
'router_config': webadmin_settings.router_config_last_updated.isoformat()
} }
return JsonResponse(data) return JsonResponse(data)
@ -28,4 +47,7 @@ def view_update_router_status(request):
else: else:
router.routerstatus.status_online = False router.routerstatus.status_online = False
router.routerstatus.save() router.routerstatus.save()
return JsonResponse({'status': 'success'}) webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
webadmin_settings.monitoring_last_run = timezone.now()
webadmin_settings.save()
return JsonResponse({'status': 'success', 'router_config': webadmin_settings.router_config_last_updated.isoformat()})

View file

@ -5,6 +5,7 @@ from django.contrib.auth.decorators import login_required
from backup.models import BackupProfile from backup.models import BackupProfile
from backup_data.models import RouterBackup from backup_data.models import RouterBackup
from routerfleet_tools.models import WebadminSettings
from .models import Router, RouterGroup, RouterStatus, SSHKey, BackupSchedule from .models import Router, RouterGroup, RouterStatus, SSHKey, BackupSchedule
from .forms import RouterForm, RouterGroupForm, SSHKeyForm from .forms import RouterForm, RouterGroupForm, SSHKeyForm
from user_manager.models import UserAcl from user_manager.models import UserAcl
@ -59,12 +60,16 @@ def view_router_details(request):
def view_manage_router(request): def view_manage_router(request):
if not UserAcl.objects.filter(user=request.user).filter(user_level__gte=30).exists(): if not UserAcl.objects.filter(user=request.user).filter(user_level__gte=30).exists():
return render(request, 'access_denied.html', {'page_title': 'Access Denied'}) return render(request, 'access_denied.html', {'page_title': 'Access Denied'})
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
if request.GET.get('uuid'): if request.GET.get('uuid'):
router = get_object_or_404(Router, uuid=request.GET.get('uuid')) router = get_object_or_404(Router, uuid=request.GET.get('uuid'))
if request.GET.get('action') == 'delete': if request.GET.get('action') == 'delete':
if request.GET.get('confirmation') == 'delete': if request.GET.get('confirmation') == 'delete':
router.delete() router.delete()
messages.success(request, 'Router deleted successfully') messages.success(request, 'Router deleted successfully')
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.save()
return redirect('router_list') return redirect('router_list')
else: else:
messages.warning(request, 'Router not deleted|Invalid confirmation') messages.warning(request, 'Router not deleted|Invalid confirmation')
@ -78,6 +83,8 @@ def view_manage_router(request):
messages.success(request, 'Router saved successfully|It may take a few minutes until monitoring starts for this router.') messages.success(request, 'Router saved successfully|It may take a few minutes until monitoring starts for this router.')
router_status, _ = RouterStatus.objects.get_or_create(router=form.instance) router_status, _ = RouterStatus.objects.get_or_create(router=form.instance)
BackupSchedule.objects.filter(router=form.instance).delete() BackupSchedule.objects.filter(router=form.instance).delete()
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.save()
return redirect('router_list') return redirect('router_list')
context = { context = {

View file

@ -6,7 +6,7 @@ from user_manager.views import view_manage_user, view_user_list
from accounts.views import view_login, view_logout, view_create_first_user from accounts.views import view_login, view_logout, view_create_first_user
from router_manager.views import view_router_list, view_manage_router, view_router_group_list, view_ssh_key_list, view_manage_router_group, view_manage_sshkey, view_router_details, view_create_instant_backup_task from router_manager.views import view_router_list, view_manage_router, view_router_group_list, view_ssh_key_list, view_manage_router_group, view_manage_sshkey, view_router_details, view_create_instant_backup_task
from backup.views import view_backup_profile_list, view_manage_backup_profile, view_backup_list, view_backup_details, view_debug_run_backups, view_compare_backups, view_backup_download, view_backup_delete from backup.views import view_backup_profile_list, view_manage_backup_profile, view_backup_list, view_backup_details, view_debug_run_backups, view_compare_backups, view_backup_download, view_backup_delete
from monitoring.views import view_export_router_list, view_update_router_status from monitoring.views import view_export_router_list, view_update_router_status, view_router_config_timestamp
from backup_data.views import view_generate_backup_schedule, view_create_backup_tasks, view_perform_backup_tasks, view_housekeeping from backup_data.views import view_generate_backup_schedule, view_create_backup_tasks, view_perform_backup_tasks, view_housekeeping
from routerfleet_tools.views import cron_check_updates from routerfleet_tools.views import cron_check_updates
@ -38,6 +38,7 @@ urlpatterns = [
path('backup/delete/', view_backup_delete, name='delete_backup'), path('backup/delete/', view_backup_delete, name='delete_backup'),
path('monitoring/export_router_list/', view_export_router_list, name='export_router_list'), path('monitoring/export_router_list/', view_export_router_list, name='export_router_list'),
path('monitoring/update_router_status/', view_update_router_status, name='update_router_status'), path('monitoring/update_router_status/', view_update_router_status, name='update_router_status'),
path('monitoring/router_config_timestamp/', view_router_config_timestamp, name='router_config_timestamp'),
path('cron/generate_backup_schedule/', view_generate_backup_schedule, name='generate_backup_schedule'), path('cron/generate_backup_schedule/', view_generate_backup_schedule, name='generate_backup_schedule'),
path('cron/create_backup_tasks/', view_create_backup_tasks, name='create_backup_tasks'), path('cron/create_backup_tasks/', view_create_backup_tasks, name='create_backup_tasks'),
path('cron/perform_backup_tasks/', view_perform_backup_tasks, name='perform_backup_tasks'), path('cron/perform_backup_tasks/', view_perform_backup_tasks, name='perform_backup_tasks'),

View file

@ -0,0 +1,23 @@
# Generated by Django 5.0.3 on 2024-04-11 14:23
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('routerfleet_tools', '0002_webadminsettings_cron_last_run'),
]
operations = [
migrations.AddField(
model_name='webadminsettings',
name='monitoring_last_run',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='webadminsettings',
name='router_config_last_updated',
field=models.DateTimeField(blank=True, null=True),
),
]

View file

@ -8,7 +8,9 @@ class WebadminSettings(models.Model):
current_version = models.PositiveIntegerField(default=0) current_version = models.PositiveIntegerField(default=0)
latest_version = models.PositiveIntegerField(default=0) latest_version = models.PositiveIntegerField(default=0)
last_checked = models.DateTimeField(blank=True, null=True) last_checked = models.DateTimeField(blank=True, null=True)
router_config_last_updated = models.DateTimeField(blank=True, null=True)
cron_last_run = models.DateTimeField(blank=True, null=True) cron_last_run = models.DateTimeField(blank=True, null=True)
monitoring_last_run = models.DateTimeField(blank=True, null=True)
updated = models.DateTimeField(auto_now=True) updated = models.DateTimeField(auto_now=True)
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)