Update scripts/health_checker.py

This commit is contained in:
stoney420 2025-06-28 02:16:03 +02:00
parent c582c80cc1
commit 96a5a7fb37

View file

@ -1,597 +1,248 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Repository Health Monitor - Keeps the repository clean and organized Health Checker - Simple URL health checking for IPTV channels
""" """
import os
import shutil
import logging import logging
import json import requests
from datetime import datetime, timedelta import concurrent.futures
from pathlib import Path from typing import Dict, List, Optional
from typing import Dict, List, Tuple import time
import subprocess
import gzip
class RepoHealthMonitor: class HealthChecker:
"""Monitor and maintain repository cleanliness and organization.""" """Simple health checker for IPTV channel URLs."""
def __init__(self, config=None): def __init__(self, config):
self.config = config self.config = config
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
self.root_path = Path.cwd() self.timeout = config.settings.get('health_check_timeout', 5)
self.max_workers = config.settings.get('max_workers', 4)
# Define cleanup rules def check_single_url(self, url: str) -> Dict:
self.cleanup_rules = { """Check a single URL for accessibility."""
'temp_files': { start_time = time.time()
'patterns': ['*_temp*', '*.tmp', '*~', '*.backup.*'],
'max_age_days': 1,
'action': 'delete'
},
'old_logs': {
'patterns': ['*.log'],
'max_age_days': 7,
'action': 'archive',
'keep_recent': 5
},
'old_backups': {
'patterns': ['backups/*.txt'],
'max_age_days': 30,
'action': 'compress'
},
'large_files': {
'max_size_mb': 50,
'action': 'warn'
},
'python_cache': {
'patterns': ['__pycache__', '*.pyc', '*.pyo'],
'action': 'delete'
}
}
def run_health_check(self) -> Dict:
"""Run comprehensive repository health check."""
self.logger.info("🔍 Starting repository health check...")
health_report = {
'timestamp': datetime.now().isoformat(),
'repository_size': self._calculate_repo_size(),
'file_counts': self._count_files_by_type(),
'issues_found': [],
'cleanup_suggestions': [],
'space_analysis': self._analyze_disk_usage(),
'organization_score': 0
}
# Check various aspects
health_report.update({
'temp_files': self._check_temp_files(),
'log_files': self._check_log_files(),
'backup_files': self._check_backup_files(),
'large_files': self._check_large_files(),
'python_artifacts': self._check_python_artifacts(),
'git_status': self._check_git_status()
})
# Calculate organization score
health_report['organization_score'] = self._calculate_organization_score(health_report)
# Generate suggestions
health_report['cleanup_suggestions'] = self._generate_cleanup_suggestions(health_report)
self.logger.info(f"📊 Health check complete. Organization score: {health_report['organization_score']}/100")
return health_report
def auto_cleanup(self, dry_run: bool = False) -> Dict:
"""Automatically clean up repository based on rules."""
self.logger.info(f"🧹 Starting auto-cleanup (dry_run={dry_run})...")
cleanup_results = {
'files_deleted': [],
'files_archived': [],
'files_compressed': [],
'space_freed_mb': 0,
'errors': []
}
try: try:
# Clean temp files response = requests.head(
cleanup_results.update(self._cleanup_temp_files(dry_run)) url,
timeout=self.timeout,
allow_redirects=True,
headers={'User-Agent': 'IPTV-Health-Checker/1.0'}
)
# Archive old logs response_time = time.time() - start_time
cleanup_results.update(self._archive_old_logs(dry_run))
# Compress old backups
cleanup_results.update(self._compress_old_backups(dry_run))
# Remove Python cache
cleanup_results.update(self._cleanup_python_cache(dry_run))
# Organize files
cleanup_results.update(self._organize_files(dry_run))
except Exception as e:
self.logger.error(f"Error during auto-cleanup: {e}")
cleanup_results['errors'].append(str(e))
self.logger.info(f"✅ Auto-cleanup complete. Space freed: {cleanup_results['space_freed_mb']:.2f} MB")
return cleanup_results
def _calculate_repo_size(self) -> Dict:
"""Calculate repository size breakdown."""
sizes = {
'total_mb': 0,
'by_directory': {},
'by_extension': {}
}
for root, dirs, files in os.walk(self.root_path):
# Skip .git directory
if '.git' in root:
continue
dir_size = 0
for file in files:
file_path = Path(root) / file
try:
file_size = file_path.stat().st_size
dir_size += file_size
# Track by extension
ext = file_path.suffix.lower()
if ext:
sizes['by_extension'][ext] = sizes['by_extension'].get(ext, 0) + file_size
except (OSError, FileNotFoundError):
continue
if dir_size > 0:
rel_dir = str(Path(root).relative_to(self.root_path))
sizes['by_directory'][rel_dir] = dir_size / (1024 * 1024) # Convert to MB
sizes['total_mb'] += dir_size / (1024 * 1024)
return sizes
def _count_files_by_type(self) -> Dict:
"""Count files by type and directory."""
counts = {
'total_files': 0,
'by_extension': {},
'by_directory': {}
}
for root, dirs, files in os.walk(self.root_path):
if '.git' in root:
continue
rel_dir = str(Path(root).relative_to(self.root_path))
counts['by_directory'][rel_dir] = len(files)
counts['total_files'] += len(files)
for file in files:
ext = Path(file).suffix.lower()
if ext:
counts['by_extension'][ext] = counts['by_extension'].get(ext, 0) + 1
return counts
def _check_temp_files(self) -> Dict:
"""Check for temporary files that should be cleaned."""
temp_files = []
for pattern in self.cleanup_rules['temp_files']['patterns']:
for file_path in self.root_path.rglob(pattern):
if file_path.is_file() and '.git' not in str(file_path):
age_days = (datetime.now() - datetime.fromtimestamp(file_path.stat().st_mtime)).days
temp_files.append({
'path': str(file_path.relative_to(self.root_path)),
'size_mb': file_path.stat().st_size / (1024 * 1024),
'age_days': age_days
})
return {
'count': len(temp_files),
'files': temp_files,
'total_size_mb': sum(f['size_mb'] for f in temp_files)
}
def _check_log_files(self) -> Dict:
"""Check log file status and organization."""
log_files = []
reports_dir = self.root_path / 'reports'
# Check root log files
for log_file in self.root_path.glob('*.log'):
age_days = (datetime.now() - datetime.fromtimestamp(log_file.stat().st_mtime)).days
log_files.append({
'path': str(log_file.relative_to(self.root_path)),
'size_mb': log_file.stat().st_size / (1024 * 1024),
'age_days': age_days,
'location': 'root',
'should_move': True
})
# Check reports directory
if reports_dir.exists():
for log_file in reports_dir.rglob('*.log'):
age_days = (datetime.now() - datetime.fromtimestamp(log_file.stat().st_mtime)).days
log_files.append({
'path': str(log_file.relative_to(self.root_path)),
'size_mb': log_file.stat().st_size / (1024 * 1024),
'age_days': age_days,
'location': 'reports',
'should_move': False
})
return {
'count': len(log_files),
'files': log_files,
'misplaced_count': sum(1 for f in log_files if f['should_move']),
'total_size_mb': sum(f['size_mb'] for f in log_files)
}
def _check_backup_files(self) -> Dict:
"""Check backup file organization and compression opportunities."""
backups = []
backup_dir = self.root_path / 'backups'
if backup_dir.exists():
for backup_file in backup_dir.rglob('*'):
if backup_file.is_file():
age_days = (datetime.now() - datetime.fromtimestamp(backup_file.stat().st_mtime)).days
is_compressed = backup_file.suffix in ['.gz', '.zip', '.tar.gz']
backups.append({
'path': str(backup_file.relative_to(self.root_path)),
'size_mb': backup_file.stat().st_size / (1024 * 1024),
'age_days': age_days,
'is_compressed': is_compressed,
'should_compress': age_days > 7 and not is_compressed
})
return {
'count': len(backups),
'files': backups,
'compression_candidates': sum(1 for b in backups if b['should_compress']),
'total_size_mb': sum(b['size_mb'] for b in backups)
}
def _check_large_files(self) -> Dict:
"""Check for unusually large files."""
large_files = []
max_size_bytes = self.cleanup_rules['large_files']['max_size_mb'] * 1024 * 1024
for root, dirs, files in os.walk(self.root_path):
if '.git' in root:
continue
for file in files:
file_path = Path(root) / file
try:
if file_path.stat().st_size > max_size_bytes:
large_files.append({
'path': str(file_path.relative_to(self.root_path)),
'size_mb': file_path.stat().st_size / (1024 * 1024),
'type': file_path.suffix.lower()
})
except (OSError, FileNotFoundError):
continue
return {
'count': len(large_files),
'files': large_files,
'total_size_mb': sum(f['size_mb'] for f in large_files)
}
def _check_python_artifacts(self) -> Dict:
"""Check for Python cache and compiled files."""
artifacts = []
# Find __pycache__ directories
for pycache_dir in self.root_path.rglob('__pycache__'):
if pycache_dir.is_dir():
size = sum(f.stat().st_size for f in pycache_dir.rglob('*') if f.is_file())
artifacts.append({
'path': str(pycache_dir.relative_to(self.root_path)),
'type': 'directory',
'size_mb': size / (1024 * 1024)
})
# Find .pyc and .pyo files
for pyc_file in self.root_path.rglob('*.py[co]'):
artifacts.append({
'path': str(pyc_file.relative_to(self.root_path)),
'type': 'file',
'size_mb': pyc_file.stat().st_size / (1024 * 1024)
})
return {
'count': len(artifacts),
'files': artifacts,
'total_size_mb': sum(a['size_mb'] for a in artifacts)
}
def _check_git_status(self) -> Dict:
"""Check git repository status."""
try:
# Check for untracked files
result = subprocess.run(['git', 'status', '--porcelain'],
capture_output=True, text=True, cwd=self.root_path)
untracked = []
modified = []
for line in result.stdout.strip().split('\n'):
if line:
status, filename = line[:2], line[3:]
if status.strip() == '??':
untracked.append(filename)
elif status.strip():
modified.append(filename)
return { return {
'untracked_files': untracked, 'url': url,
'modified_files': modified, 'status': 'healthy' if response.status_code < 400 else 'unhealthy',
'is_clean': len(untracked) == 0 and len(modified) == 0 'status_code': response.status_code,
'response_time': round(response_time, 2),
'error': None
} }
except subprocess.CalledProcessError:
return {'error': 'Not a git repository or git not available'}
def _calculate_organization_score(self, health_report: Dict) -> int:
"""Calculate a repository organization score (0-100)."""
score = 100
# Deduct points for issues
if health_report['temp_files']['count'] > 0:
score -= min(20, health_report['temp_files']['count'] * 2)
if health_report['log_files']['misplaced_count'] > 0:
score -= min(15, health_report['log_files']['misplaced_count'] * 5)
if health_report['backup_files']['compression_candidates'] > 0:
score -= min(10, health_report['backup_files']['compression_candidates'] * 3)
if health_report['python_artifacts']['count'] > 0:
score -= min(10, health_report['python_artifacts']['count'])
if health_report['large_files']['count'] > 0:
score -= min(15, health_report['large_files']['count'] * 5)
# Check git status
git_status = health_report.get('git_status', {})
if not git_status.get('is_clean', True):
score -= 10
return max(0, score)
def _generate_cleanup_suggestions(self, health_report: Dict) -> List[str]:
"""Generate specific cleanup suggestions based on health report."""
suggestions = []
if health_report['temp_files']['count'] > 0:
suggestions.append(f"🗑️ Remove {health_report['temp_files']['count']} temporary files ({health_report['temp_files']['total_size_mb']:.1f} MB)")
if health_report['log_files']['misplaced_count'] > 0:
suggestions.append(f"📁 Move {health_report['log_files']['misplaced_count']} log files to reports/ directory")
if health_report['backup_files']['compression_candidates'] > 0:
suggestions.append(f"🗜️ Compress {health_report['backup_files']['compression_candidates']} old backup files")
if health_report['python_artifacts']['count'] > 0:
suggestions.append(f"🐍 Remove Python cache artifacts ({health_report['python_artifacts']['total_size_mb']:.1f} MB)")
if health_report['large_files']['count'] > 0:
suggestions.append(f"📏 Review {health_report['large_files']['count']} large files for archival")
git_status = health_report.get('git_status', {})
if git_status.get('untracked_files'):
suggestions.append(f"📝 Add {len(git_status['untracked_files'])} untracked files to .gitignore or commit them")
return suggestions
def _analyze_disk_usage(self) -> Dict:
"""Analyze disk usage patterns."""
try:
total, used, free = shutil.disk_usage(self.root_path)
return {
'total_gb': total / (1024**3),
'used_gb': used / (1024**3),
'free_gb': free / (1024**3),
'usage_percent': (used / total) * 100
}
except Exception as e:
return {'error': str(e)}
def _cleanup_temp_files(self, dry_run: bool) -> Dict:
"""Clean up temporary files."""
results = {'temp_files_deleted': []}
for pattern in self.cleanup_rules['temp_files']['patterns']:
for file_path in self.root_path.rglob(pattern):
if file_path.is_file() and '.git' not in str(file_path):
if not dry_run:
try:
file_path.unlink()
results['temp_files_deleted'].append(str(file_path.relative_to(self.root_path)))
except Exception as e:
self.logger.warning(f"Could not delete {file_path}: {e}")
else:
results['temp_files_deleted'].append(str(file_path.relative_to(self.root_path)))
return results
def _archive_old_logs(self, dry_run: bool) -> Dict:
"""Archive old log files."""
results = {'logs_archived': []}
# Create reports/logs directory if it doesn't exist
logs_dir = self.root_path / 'reports' / 'logs'
if not dry_run:
logs_dir.mkdir(parents=True, exist_ok=True)
# Move log files from root to reports/logs
for log_file in self.root_path.glob('*.log'):
new_path = logs_dir / log_file.name
if not dry_run:
try:
shutil.move(str(log_file), str(new_path))
results['logs_archived'].append(str(log_file.relative_to(self.root_path)))
except Exception as e:
self.logger.warning(f"Could not move {log_file}: {e}")
else:
results['logs_archived'].append(str(log_file.relative_to(self.root_path)))
return results
def _compress_old_backups(self, dry_run: bool) -> Dict:
"""Compress old backup files."""
results = {'backups_compressed': []}
backup_dir = self.root_path / 'backups'
if backup_dir.exists():
cutoff_date = datetime.now() - timedelta(days=7)
for backup_file in backup_dir.glob('*.txt'): except requests.exceptions.Timeout:
file_date = datetime.fromtimestamp(backup_file.stat().st_mtime) return {
if file_date < cutoff_date: 'url': url,
if not dry_run: 'status': 'timeout',
try: 'status_code': None,
# Compress with gzip 'response_time': self.timeout,
with open(backup_file, 'rb') as f_in: 'error': 'Request timeout'
with gzip.open(f"{backup_file}.gz", 'wb') as f_out: }
shutil.copyfileobj(f_in, f_out)
backup_file.unlink() except requests.exceptions.ConnectionError:
results['backups_compressed'].append(str(backup_file.relative_to(self.root_path))) return {
except Exception as e: 'url': url,
self.logger.warning(f"Could not compress {backup_file}: {e}") 'status': 'unreachable',
else: 'status_code': None,
results['backups_compressed'].append(str(backup_file.relative_to(self.root_path))) 'response_time': time.time() - start_time,
'error': 'Connection error'
return results }
except Exception as e:
return {
'url': url,
'status': 'error',
'status_code': None,
'response_time': time.time() - start_time,
'error': str(e)
}
def _cleanup_python_cache(self, dry_run: bool) -> Dict: def check_channel_health(self, channel: Dict) -> Dict:
"""Remove Python cache files and directories.""" """Check health of a single channel."""
results = {'python_cache_removed': []} url = channel.get('Stream URL', '')
# Remove __pycache__ directories if not url:
for pycache_dir in self.root_path.rglob('__pycache__'): return {
if pycache_dir.is_dir(): 'channel_name': channel.get('Stream name', 'Unknown'),
if not dry_run: 'url': '',
try: 'status': 'no_url',
shutil.rmtree(pycache_dir) 'status_code': None,
results['python_cache_removed'].append(str(pycache_dir.relative_to(self.root_path))) 'response_time': 0,
except Exception as e: 'error': 'No URL provided'
self.logger.warning(f"Could not remove {pycache_dir}: {e}") }
else:
results['python_cache_removed'].append(str(pycache_dir.relative_to(self.root_path)))
# Remove .pyc and .pyo files result = self.check_single_url(url)
for pyc_file in self.root_path.rglob('*.py[co]'): result['channel_name'] = channel.get('Stream name', 'Unknown')
if not dry_run:
return result
def batch_health_check(self, channels: List[Dict]) -> Dict:
"""Perform batch health check on multiple channels."""
if not self.config.settings.get('enable_health_check', False):
self.logger.info("Health checking is disabled")
return {'enabled': False, 'results': []}
self.logger.info(f"Starting health check for {len(channels)} channels...")
start_time = time.time()
results = []
# Use ThreadPoolExecutor for concurrent checks
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Submit all health check tasks
future_to_channel = {
executor.submit(self.check_channel_health, channel): channel
for channel in channels
}
# Collect results as they complete
for future in concurrent.futures.as_completed(future_to_channel):
try: try:
pyc_file.unlink() result = future.result()
results['python_cache_removed'].append(str(pyc_file.relative_to(self.root_path))) results.append(result)
except Exception as e: except Exception as e:
self.logger.warning(f"Could not remove {pyc_file}: {e}") channel = future_to_channel[future]
else: self.logger.error(f"Health check failed for {channel.get('Stream name', 'Unknown')}: {e}")
results['python_cache_removed'].append(str(pyc_file.relative_to(self.root_path))) results.append({
'channel_name': channel.get('Stream name', 'Unknown'),
'url': channel.get('Stream URL', ''),
'status': 'error',
'status_code': None,
'response_time': 0,
'error': str(e)
})
return results total_time = time.time() - start_time
# Generate summary statistics
summary = self._generate_health_summary(results, total_time)
self.logger.info(f"Health check completed in {total_time:.1f}s: "
f"{summary['healthy']}/{summary['total']} channels healthy")
return {
'enabled': True,
'results': results,
'summary': summary,
'total_time': total_time
}
def _organize_files(self, dry_run: bool) -> Dict: def _generate_health_summary(self, results: List[Dict], total_time: float) -> Dict:
"""Organize files into proper directories.""" """Generate summary statistics from health check results."""
results = {'files_organized': []} total = len(results)
healthy = sum(1 for r in results if r['status'] == 'healthy')
unhealthy = sum(1 for r in results if r['status'] == 'unhealthy')
timeout = sum(1 for r in results if r['status'] == 'timeout')
unreachable = sum(1 for r in results if r['status'] == 'unreachable')
errors = sum(1 for r in results if r['status'] == 'error')
no_url = sum(1 for r in results if r['status'] == 'no_url')
# Create proper directory structure # Calculate average response time for successful checks
directories = [ successful_times = [r['response_time'] for r in results if r['status'] == 'healthy']
'reports/logs', avg_response_time = sum(successful_times) / len(successful_times) if successful_times else 0
'reports/archive',
'backups/compressed',
'templates'
]
if not dry_run: return {
for directory in directories: 'total': total,
(self.root_path / directory).mkdir(parents=True, exist_ok=True) 'healthy': healthy,
'unhealthy': unhealthy,
return results 'timeout': timeout,
'unreachable': unreachable,
'errors': errors,
'no_url': no_url,
'health_percentage': round((healthy / total * 100) if total > 0 else 0, 1),
'avg_response_time': round(avg_response_time, 2),
'total_check_time': round(total_time, 1)
}
def save_health_report(self, health_report: Dict, filename: str = None) -> Path: def get_unhealthy_channels(self, health_results: Dict) -> List[Dict]:
"""Save health report to file.""" """Get list of unhealthy channels for reporting."""
if not health_results.get('enabled', False):
return []
unhealthy = []
for result in health_results.get('results', []):
if result['status'] != 'healthy':
unhealthy.append({
'name': result['channel_name'],
'url': result['url'],
'status': result['status'],
'error': result.get('error', 'Unknown error')
})
return unhealthy
def save_health_report(self, health_results: Dict, filename: str = None) -> Optional[str]:
"""Save health check results to a file."""
if not health_results.get('enabled', False):
return None
import json
from datetime import datetime
from pathlib import Path
if filename is None: if filename is None:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'repo_health_{timestamp}.json' filename = f'health_check_{timestamp}.json'
reports_dir = self.root_path / 'reports' reports_dir = Path('reports')
reports_dir.mkdir(exist_ok=True) reports_dir.mkdir(exist_ok=True)
report_path = reports_dir / filename report_path = reports_dir / filename
try: try:
# Prepare report data
report_data = {
'timestamp': datetime.now().isoformat(),
'summary': health_results['summary'],
'unhealthy_channels': self.get_unhealthy_channels(health_results),
'total_time': health_results['total_time']
}
with open(report_path, 'w', encoding='utf-8') as f: with open(report_path, 'w', encoding='utf-8') as f:
json.dump(health_report, f, indent=2, default=str) json.dump(report_data, f, indent=2)
self.logger.info(f"Health report saved to: {report_path}") self.logger.info(f"Health report saved to: {report_path}")
return report_path return str(report_path)
except Exception as e: except Exception as e:
self.logger.error(f"Could not save health report: {e}") self.logger.error(f"Could not save health report: {e}")
return None return None
def main(): # Simple fallback for when requests is not available
"""Command line interface for repository health monitoring.""" class SimpleHealthChecker:
import argparse """Fallback health checker that doesn't require external dependencies."""
parser = argparse.ArgumentParser(description='IPTV Repository Health Monitor') def __init__(self, config):
parser.add_argument('--check', action='store_true', help='Run health check') self.config = config
parser.add_argument('--cleanup', action='store_true', help='Run auto cleanup') self.logger = logging.getLogger(__name__)
parser.add_argument('--dry-run', action='store_true', help='Dry run (no actual changes)')
parser.add_argument('--save-report', action='store_true', help='Save health report to file')
args = parser.parse_args() def batch_health_check(self, channels: List[Dict]) -> Dict:
"""Fallback that skips health checking."""
# Setup logging self.logger.info("Health checking disabled (requests library not available)")
logging.basicConfig( return {
level=logging.INFO, 'enabled': False,
format='[%(asctime)s] %(levelname)s: %(message)s' 'results': [],
) 'summary': {'total': len(channels), 'healthy': 0, 'health_percentage': 0},
'total_time': 0
monitor = RepoHealthMonitor() }
if args.check or args.save_report:
health_report = monitor.run_health_check()
print(f"\n📊 Repository Health Report")
print(f"Organization Score: {health_report['organization_score']}/100")
print(f"Total Size: {health_report['repository_size']['total_mb']:.1f} MB")
print(f"Total Files: {health_report['file_counts']['total_files']}")
if health_report['cleanup_suggestions']:
print("\n🔧 Cleanup Suggestions:")
for suggestion in health_report['cleanup_suggestions']:
print(f" {suggestion}")
else:
print("\n✅ Repository is well organized!")
if args.save_report:
monitor.save_health_report(health_report)
if args.cleanup:
cleanup_results = monitor.auto_cleanup(dry_run=args.dry_run)
if args.dry_run:
print("\n🧪 Dry Run Results:")
else:
print("\n🧹 Cleanup Results:")
for key, items in cleanup_results.items():
if isinstance(items, list) and items:
print(f" {key}: {len(items)} items")
for item in items[:5]: # Show first 5
print(f" - {item}")
if len(items) > 5:
print(f" ... and {len(items) - 5} more")
if __name__ == "__main__": # Try to use the full health checker, fall back to simple one if requests isn't available
main() try:
import requests
# If requests is available, use the full HealthChecker
except ImportError:
# If requests is not available, use the fallback
class HealthChecker(SimpleHealthChecker):
pass