Monitoring improved to update earlier when any router configuration changes

This commit is contained in:
Eduardo Silva 2024-04-11 12:54:28 -03:00
parent d7c44adc87
commit 23c4090ec1
6 changed files with 109 additions and 11 deletions

View file

@ -3,31 +3,58 @@ import time
from datetime import datetime
from subprocess import Popen, PIPE
DEBUG = False
if DEBUG:
API_ADDRESS = "http://localhost:8000"
else:
API_ADDRESS = "http://routerfleet:8001"
HOST_LIST_URL = f"{API_ADDRESS}/monitoring/export_router_list/"
UPDATE_STATUS_URL = f"{API_ADDRESS}/monitoring/update_router_status/"
CONFIG_TIMESTAMP_URL = f"{API_ADDRESS}/monitoring/router_config_timestamp/"
UPDATE_HOST_LIST_INTERVAL = 600 # How often to update the router list in seconds
MONITOR_INTERVAL = 60 # How often to monitor each router in seconds
MAX_NOTIFICATIONS_PER_MONITOR_INTERVAL = 50 # Throttle the number of notifications sent to the remote API
HOST_LIST_URL = "http://routerfleet:8001/monitoring/export_router_list/"
UPDATE_STATUS_URL = "http://routerfleet:8001/monitoring/update_router_status/"
DEBUG = False
# Global variables
host_list = []
host_list_update_timestamp = 0
notification_count = 0
current_router_config_timestamp = ''
remote_router_config_timestamp = ''
def get_verbose_status(status):
return "online" if status else "offline"
def fetch_host_list():
global host_list_update_timestamp
def update_router_config_timestamp():
global remote_router_config_timestamp
try:
response = requests.get(CONFIG_TIMESTAMP_URL)
if response.status_code == 200:
remote_router_config_timestamp_temp = response.json()['router_config']
if remote_router_config_timestamp_temp != remote_router_config_timestamp:
remote_router_config_timestamp = remote_router_config_timestamp_temp
print(f"{datetime.now()} - Router config timestamp updated: {remote_router_config_timestamp}")
else:
print(f"{datetime.now()} - Router config timestamp unchanged: {remote_router_config_timestamp}")
else:
print(f"{datetime.now()} - Error updating router config timestamp: HTTP {response.status_code}")
except Exception as e:
print(f"{datetime.now()} - Exception updating router config timestamp: {e}")
return
def fetch_host_list():
global host_list_update_timestamp, current_router_config_timestamp, remote_router_config_timestamp
try:
print(f"{datetime.now()} - Fetching host list...")
response = requests.get(HOST_LIST_URL)
if response.status_code == 200:
host_list_update_timestamp = time.time()
remote_router_config_timestamp = response.json()['router_config']
current_router_config_timestamp = remote_router_config_timestamp
return response.json()['router_list'], True
else:
print(f"{datetime.now()} - Error fetching host list: HTTP {response.status_code}")
@ -66,18 +93,33 @@ def check_host_status(host_uuid):
def update_and_monitor():
global host_list, host_list_update_timestamp, notification_count
global host_list, host_list_update_timestamp, notification_count, current_router_config_timestamp, remote_router_config_timestamp
while True:
update_router_config_timestamp()
current_time = time.time()
notification_count = 0
update_required = False
if not current_router_config_timestamp:
update_required = True
if current_router_config_timestamp != remote_router_config_timestamp:
update_required = True
if current_time - host_list_update_timestamp > UPDATE_HOST_LIST_INTERVAL:
update_required = True
if update_required:
print(f"{datetime.now()} - Update required. Fetching host list...")
new_host_list, fetch_host_list_success = fetch_host_list()
if fetch_host_list_success:
host_list = new_host_list
print(f"{datetime.now()} - host list updated.")
if DEBUG:
print(host_list)
else:
print(f"{datetime.now()} - No update required. Skipping host list update.")
if DEBUG:
print(f"{datetime.now()} - Current router config timestamp: {current_router_config_timestamp}")
print(f"{datetime.now()} - Remote router config timestamp: {remote_router_config_timestamp}")
if host_list:
if DEBUG:
@ -94,6 +136,7 @@ def update_and_monitor():
if __name__ == "__main__":
print(f"{datetime.now()} - Monitoring container started, waiting for routerfleet container to start...")
if not DEBUG:
time.sleep(30) # Wait for the routerfleet container to start
print(f"{datetime.now()} - Starting monitoring service...")
update_and_monitor()

View file

@ -1,10 +1,28 @@
from django.shortcuts import render
from router_manager.models import Router
from django.http import JsonResponse
from django.utils import timezone
from routerfleet_tools.models import WebadminSettings
def view_router_config_timestamp(request):
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
if not webadmin_settings.router_config_last_updated:
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.monitoring_last_run = timezone.now()
webadmin_settings.save()
return JsonResponse({'router_config': webadmin_settings.router_config_last_updated.isoformat()})
def view_export_router_list(request):
router_list = {}
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
webadmin_settings.monitoring_last_run = timezone.now()
if not webadmin_settings.router_config_last_updated:
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.save()
for router in Router.objects.filter(enabled=True, monitoring=True):
router_list[str(router.uuid)] = {
'address': router.address,
@ -13,7 +31,8 @@ def view_export_router_list(request):
'uuid': str(router.uuid),
}
data = {
'router_list': router_list
'router_list': router_list,
'router_config': webadmin_settings.router_config_last_updated.isoformat()
}
return JsonResponse(data)
@ -28,4 +47,7 @@ def view_update_router_status(request):
else:
router.routerstatus.status_online = False
router.routerstatus.save()
return JsonResponse({'status': 'success'})
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
webadmin_settings.monitoring_last_run = timezone.now()
webadmin_settings.save()
return JsonResponse({'status': 'success', 'router_config': webadmin_settings.router_config_last_updated.isoformat()})

View file

@ -5,6 +5,7 @@ from django.contrib.auth.decorators import login_required
from backup.models import BackupProfile
from backup_data.models import RouterBackup
from routerfleet_tools.models import WebadminSettings
from .models import Router, RouterGroup, RouterStatus, SSHKey, BackupSchedule
from .forms import RouterForm, RouterGroupForm, SSHKeyForm
from user_manager.models import UserAcl
@ -59,12 +60,16 @@ def view_router_details(request):
def view_manage_router(request):
if not UserAcl.objects.filter(user=request.user).filter(user_level__gte=30).exists():
return render(request, 'access_denied.html', {'page_title': 'Access Denied'})
webadmin_settings, _ = WebadminSettings.objects.get_or_create(name='webadmin_settings')
if request.GET.get('uuid'):
router = get_object_or_404(Router, uuid=request.GET.get('uuid'))
if request.GET.get('action') == 'delete':
if request.GET.get('confirmation') == 'delete':
router.delete()
messages.success(request, 'Router deleted successfully')
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.save()
return redirect('router_list')
else:
messages.warning(request, 'Router not deleted|Invalid confirmation')
@ -78,6 +83,8 @@ def view_manage_router(request):
messages.success(request, 'Router saved successfully|It may take a few minutes until monitoring starts for this router.')
router_status, _ = RouterStatus.objects.get_or_create(router=form.instance)
BackupSchedule.objects.filter(router=form.instance).delete()
webadmin_settings.router_config_last_updated = timezone.now()
webadmin_settings.save()
return redirect('router_list')
context = {

View file

@ -6,7 +6,7 @@ from user_manager.views import view_manage_user, view_user_list
from accounts.views import view_login, view_logout, view_create_first_user
from router_manager.views import view_router_list, view_manage_router, view_router_group_list, view_ssh_key_list, view_manage_router_group, view_manage_sshkey, view_router_details, view_create_instant_backup_task
from backup.views import view_backup_profile_list, view_manage_backup_profile, view_backup_list, view_backup_details, view_debug_run_backups, view_compare_backups, view_backup_download, view_backup_delete
from monitoring.views import view_export_router_list, view_update_router_status
from monitoring.views import view_export_router_list, view_update_router_status, view_router_config_timestamp
from backup_data.views import view_generate_backup_schedule, view_create_backup_tasks, view_perform_backup_tasks, view_housekeeping
from routerfleet_tools.views import cron_check_updates
@ -38,6 +38,7 @@ urlpatterns = [
path('backup/delete/', view_backup_delete, name='delete_backup'),
path('monitoring/export_router_list/', view_export_router_list, name='export_router_list'),
path('monitoring/update_router_status/', view_update_router_status, name='update_router_status'),
path('monitoring/router_config_timestamp/', view_router_config_timestamp, name='router_config_timestamp'),
path('cron/generate_backup_schedule/', view_generate_backup_schedule, name='generate_backup_schedule'),
path('cron/create_backup_tasks/', view_create_backup_tasks, name='create_backup_tasks'),
path('cron/perform_backup_tasks/', view_perform_backup_tasks, name='perform_backup_tasks'),

View file

@ -0,0 +1,23 @@
# Generated by Django 5.0.3 on 2024-04-11 14:23
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('routerfleet_tools', '0002_webadminsettings_cron_last_run'),
]
operations = [
migrations.AddField(
model_name='webadminsettings',
name='monitoring_last_run',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='webadminsettings',
name='router_config_last_updated',
field=models.DateTimeField(blank=True, null=True),
),
]

View file

@ -8,7 +8,9 @@ class WebadminSettings(models.Model):
current_version = models.PositiveIntegerField(default=0)
latest_version = models.PositiveIntegerField(default=0)
last_checked = models.DateTimeField(blank=True, null=True)
router_config_last_updated = models.DateTimeField(blank=True, null=True)
cron_last_run = models.DateTimeField(blank=True, null=True)
monitoring_last_run = models.DateTimeField(blank=True, null=True)
updated = models.DateTimeField(auto_now=True)
created = models.DateTimeField(auto_now_add=True)