Update scripts/generate_playlist.py
All checks were successful
📺 Generate M3U Playlist / build (push) Successful in 1m41s

This commit is contained in:
stoney420 2025-06-27 18:36:13 +02:00
parent 7359b0c99f
commit 998a2dbcfa

View file

@ -3,173 +3,16 @@ import os
import json
from datetime import datetime
# --- Configuration ---
# --- Simple Configuration ---
CHANNELS_FILE = 'channels.txt'
PLAYLIST_FILE = 'playlist.m3u'
IMPORT_FILE = 'bulk_import.m3u'
LOG_FILE = 'playlist_update.log'
# Config files (optional)
SETTINGS_FILE = 'config/settings.json'
GROUP_OVERRIDES_FILE = 'config/group_overrides.json'
# Country detection patterns
COUNTRY_PATTERNS = {
# United Kingdom
"🇬🇧 United Kingdom": [
"uk", "united kingdom", "britain", "british", "england", "scotland", "wales",
"bbc", "itv", "sky", "channel 4", "channel5", "dave", "really", "yesterday",
"drama", "pick", "alibi", "eden", "gold", "w+1", "more4", "e4", "film4",
"quest", "discovery uk", "eurosport uk", "bt sport"
],
# United States
"🇺🇸 United States": [
"usa", "us", "united states", "america", "american",
"cnn", "fox news", "msnbc", "abc", "nbc", "cbs", "espn", "fox sports",
"disney", "nickelodeon", "cartoon network", "tnt", "tbs", "usa network",
"fx", "amc", "discovery", "history", "nat geo", "hgtv", "food network"
],
# Canada
"🇨🇦 Canada": [
"canada", "canadian", "cbc", "ctv", "global", "city tv", "tvo", "ici",
"sportsnet", "tsn", "rds", "aptn", "ztele", "canal d", "tele quebec"
],
# Australia
"🇦🇺 Australia": [
"australia", "australian", "aussie", "abc au", "sbs", "nine", "ten",
"seven", "foxtel", "optus sport", "kayo"
],
# Germany
"🇩🇪 Germany": [
"germany", "german", "deutschland", "ard", "zdf", "rtl", "sat.1", "pro7",
"vox", "kabel", "sport1", "eurosport de", "sky de"
],
# France
"🇫🇷 France": [
"france", "french", "tf1", "france 2", "france 3", "france 5", "m6",
"canal+", "bfm", "cnews", "rmc", "eurosport fr"
],
# Spain
"🇪🇸 Spain": [
"spain", "spanish", "españa", "tve", "antena 3", "cuatro", "telecinco",
"la sexta", "canal sur", "telemadrid", "movistar"
],
# Italy
"🇮🇹 Italy": [
"italy", "italian", "italia", "rai", "mediaset", "canale 5", "italia 1",
"rete 4", "la7", "sky italia"
],
# Netherlands
"🇳🇱 Netherlands": [
"netherlands", "dutch", "nederland", "npo", "rtl nl", "sbs nl", "veronica",
"net5", "rtl 4", "rtl 5", "rtl 7"
],
# Belgium
"🇧🇪 Belgium": [
"belgium", "belgian", "vtm", "een", "canvas", "ketnet", "rtbf", "la une"
],
# Portugal
"🇵🇹 Portugal": [
"portugal", "portuguese", "rtp", "sic", "tvi", "porto canal", "benfica tv"
],
# India
"🇮🇳 India": [
"india", "indian", "hindi", "bollywood", "zee", "star plus", "colors",
"sony", "dd national", "aaj tak", "ndtv", "times now"
],
# Brazil
"🇧🇷 Brazil": [
"brazil", "brazilian", "brasil", "globo", "sbt", "record", "band",
"rede tv", "cultura", "sportv"
],
# Mexico
"🇲🇽 Mexico": [
"mexico", "mexican", "televisa", "tv azteca", "canal 5", "las estrellas",
"canal once", "imagen"
],
# Arabic/Middle East
"🇸🇦 Arabic": [
"arabic", "arab", "al jazeera", "mbc", "dubai", "abu dhabi", "qatar",
"saudi", "kuwait", "lebanon", "syria", "iraq", "jordan"
],
# Turkey
"🇹🇷 Turkey": [
"turkey", "turkish", "trt", "atv", "kanal d", "star tv", "fox tr",
"show tv", "ntv"
],
# Russia
"🇷🇺 Russia": [
"russia", "russian", "rt", "channel one", "россия", "нтв", "тнт"
],
# Poland
"🇵🇱 Poland": [
"poland", "polish", "tvp", "polsat", "tvn", "tv4", "canal+ pl"
],
# Sweden
"🇸🇪 Sweden": [
"sweden", "swedish", "svt", "tv4", "kanal 5", "tv6", "tv8"
],
# Norway
"🇳🇴 Norway": [
"norway", "norwegian", "nrk", "tv2", "tvnorge", "max"
],
# Denmark
"🇩🇰 Denmark": [
"denmark", "danish", "dr", "tv2 dk", "kanal 5 dk", "6eren"
],
# Finland
"🇫🇮 Finland": [
"finland", "finnish", "yle", "mtv3", "nelonen", "sub"
],
# Greece
"🇬🇷 Greece": [
"greece", "greek", "ert", "mega", "ant1", "alpha", "skai"
],
# China
"🇨🇳 China": [
"china", "chinese", "cctv", "cgtn", "phoenix", "tvb", "中国", "中央"
],
# Japan
"🇯🇵 Japan": [
"japan", "japanese", "nhk", "fuji tv", "tbs", "tv asahi", "nippon tv"
],
# South Korea
"🇰🇷 South Korea": [
"korea", "korean", "kbs", "mbc", "sbs", "jtbc", "tvn"
],
# International/Global
"🌍 International": [
"international", "global", "world", "euro", "euronews", "dw",
"france 24", "cnn international", "bbc world", "sky news",
"bloomberg", "cnbc", "discovery", "national geographic",
"animal planet", "history", "travel", "mtv", "vh1", "nickelodeon"
]
}
def log_message(message, level="INFO"):
"""Logs messages to file and prints them."""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -190,8 +33,7 @@ def load_settings():
"sort_channels": True,
"backup_before_import": True,
"auto_cleanup_import": True,
"auto_detect_country": True,
"normalize_country_names": True
"auto_detect_country": True
}
if os.path.exists(SETTINGS_FILE):
@ -204,62 +46,8 @@ def load_settings():
return default_settings
def detect_country_from_text(text):
"""Detect country from channel name, group, or other text."""
text_lower = text.lower()
# Score each country based on keyword matches
country_scores = {}
for country, keywords in COUNTRY_PATTERNS.items():
score = 0
for keyword in keywords:
if keyword in text_lower:
# Give higher score for exact matches and longer keywords
score += len(keyword) * (2 if keyword == text_lower else 1)
if score > 0:
country_scores[country] = score
# Return country with highest score
if country_scores:
best_country = max(country_scores, key=country_scores.get)
return best_country, country_scores[best_country]
return None, 0
def smart_country_detection(channel):
"""Smart country detection using multiple sources."""
# Sources to check (in order of priority)
sources = [
("Stream name", channel.get('Stream name', '')),
("Group", channel.get('Group', '')),
("EPG id", channel.get('EPG id', '')),
("Logo", channel.get('Logo', ''))
]
best_country = None
best_score = 0
detection_source = None
for source_name, text in sources:
if text:
country, score = detect_country_from_text(text)
if country and score > best_score:
best_country = country
best_score = score
detection_source = source_name
# Log detection for debugging
if best_country:
log_message(f"Country detection: '{channel.get('Stream name', 'Unknown')}'{best_country} (from {detection_source}, score: {best_score})", "DEBUG")
else:
log_message(f"Country detection: Could not detect country for '{channel.get('Stream name', 'Unknown')}'", "DEBUG")
return best_country or "🌍 International"
def load_group_overrides():
"""Load manual group overrides."""
"""Load group overrides."""
if os.path.exists(GROUP_OVERRIDES_FILE):
try:
with open(GROUP_OVERRIDES_FILE, 'r', encoding='utf-8') as f:
@ -269,37 +57,211 @@ def load_group_overrides():
return {}
def apply_country_detection(channel, settings):
"""Apply country detection and overrides."""
original_group = channel.get('Group', 'Uncategorized')
def detect_country_from_channel(channel_name, epg_id="", logo_url=""):
"""
Auto-detect country from channel information using cascading approach.
Returns the detected country group or 'Uncategorized' if no match found.
"""
# Check manual overrides first
group_overrides = load_group_overrides()
stream_name = channel.get('Stream name', '').lower()
# Normalize inputs for better matching
channel_lower = channel_name.lower().strip()
epg_lower = epg_id.lower().strip()
logo_lower = logo_url.lower().strip()
# 1. BROADCASTER NAMES (Most reliable)
broadcaster_country = {
# US Major Networks
"cbs": "🇺🇸 United States", "nbc": "🇺🇸 United States",
"abc": "🇺🇸 United States", "fox": "🇺🇸 United States",
"espn": "🇺🇸 United States", "cnn": "🇺🇸 United States",
"hbo": "🇺🇸 United States", "mtv": "🇺🇸 United States",
"discovery": "🇺🇸 United States", "cartoon network": "🇺🇸 United States",
"showtime": "🇺🇸 United States", "starz": "🇺🇸 United States",
"tnt": "🇺🇸 United States", "tbs": "🇺🇸 United States",
# UK Networks
"bbc": "🇬🇧 United Kingdom", "itv": "🇬🇧 United Kingdom",
"channel 4": "🇬🇧 United Kingdom", "sky": "🇬🇧 United Kingdom",
"e4": "🇬🇧 United Kingdom", "film4": "🇬🇧 United Kingdom",
"more4": "🇬🇧 United Kingdom", "dave": "🇬🇧 United Kingdom",
# Canadian Networks
"cbc": "🇨🇦 Canada", "ctv": "🇨🇦 Canada", "global": "🇨🇦 Canada",
"tvo": "🇨🇦 Canada", "aptn": "🇨🇦 Canada",
# German Networks
"ard": "🇩🇪 Germany", "zdf": "🇩🇪 Germany", "rtl": "🇩🇪 Germany",
"sat.1": "🇩🇪 Germany", "pro7": "🇩🇪 Germany", "vox": "🇩🇪 Germany",
"kabel": "🇩🇪 Germany", "n24": "🇩🇪 Germany",
# French Networks
"tf1": "🇫🇷 France", "france 2": "🇫🇷 France", "m6": "🇫🇷 France",
"canal+": "🇫🇷 France", "arte": "🇫🇷 France",
# Spanish Networks
"tve": "🇪🇸 Spain", "antena 3": "🇪🇸 Spain", "telecinco": "🇪🇸 Spain",
"cuatro": "🇪🇸 Spain", "la sexta": "🇪🇸 Spain",
# Italian Networks
"rai": "🇮🇹 Italy", "mediaset": "🇮🇹 Italy", "canale 5": "🇮🇹 Italy",
"italia 1": "🇮🇹 Italy", "rete 4": "🇮🇹 Italy",
# Other Countries
"globo": "🇧🇷 Brazil", "band": "🇧🇷 Brazil", "sbt": "🇧🇷 Brazil",
"televisa": "🇲🇽 Mexico", "tv azteca": "🇲🇽 Mexico",
"al jazeera": "🇸🇦 Arabic", "mbc": "🇸🇦 Arabic", "lbc": "🇸🇦 Arabic",
"rt": "🇷🇺 Russia", "channel one": "🇷🇺 Russia",
"cctv": "🇨🇳 China", "phoenix": "🇨🇳 China",
"nhk": "🇯🇵 Japan", "fuji": "🇯🇵 Japan",
"kbs": "🇰🇷 South Korea", "sbs": "🇰🇷 South Korea", "mbc": "🇰🇷 South Korea",
"abc au": "🇦🇺 Australia", "seven": "🇦🇺 Australia", "nine": "🇦🇺 Australia",
"npo": "🇳🇱 Netherlands", "rtl nl": "🇳🇱 Netherlands"
}
# Check for exact broadcaster matches
for broadcaster, country in broadcaster_country.items():
if broadcaster in channel_lower:
return country
# 2. COUNTRY NAME PATTERNS (Very reliable)
country_patterns = {
"🇺🇸 United States": [
"usa", "united states", "america", "american", " us ", "us:", "(us)", "us hd"
],
"🇬🇧 United Kingdom": [
" uk ", "uk:", "(uk)", "united kingdom", "britain", "british", "england", "english"
],
"🇨🇦 Canada": [
"canada", "canadian", " ca ", "ca:", "(ca)"
],
"🇩🇪 Germany": [
"germany", "german", "deutschland", "deutsch", " de ", "de:", "(de)"
],
"🇫🇷 France": [
"france", "french", "français", " fr ", "fr:", "(fr)"
],
"🇪🇸 Spain": [
"spain", "spanish", "españa", "español", " es ", "es:", "(es)"
],
"🇮🇹 Italy": [
"italy", "italian", "italia", "italiano", " it ", "it:", "(it)"
],
"🇧🇷 Brazil": [
"brazil", "brazilian", "brasil", "português", " br ", "br:", "(br)"
],
"🇲🇽 Mexico": [
"mexico", "mexican", "méxico", " mx ", "mx:", "(mx)"
],
"🇦🇺 Australia": [
"australia", "australian", "aussie", " au ", "au:", "(au)"
],
"🇳🇱 Netherlands": [
"netherlands", "dutch", "holland", "nederland", " nl ", "nl:", "(nl)"
],
"🇷🇺 Russia": [
"russia", "russian", "россия", " ru ", "ru:", "(ru)"
],
"🇨🇳 China": [
"china", "chinese", "中国", " cn ", "cn:", "(cn)"
],
"🇯🇵 Japan": [
"japan", "japanese", "日本", " jp ", "jp:", "(jp)"
],
"🇰🇷 South Korea": [
"korea", "korean", "한국", " kr ", "kr:", "(kr)", "south korea"
],
"🇸🇦 Arabic": [
"arabic", "arab", "middle east", "العربية", "al ", "aljazeera"
],
"🇮🇳 India": [
"india", "indian", "hindi", "bollywood", "zee", "star plus"
],
"🇵🇹 Portugal": [
"portugal", "portuguese", " pt ", "pt:", "(pt)"
],
"🇹🇷 Turkey": [
"turkey", "turkish", "türkiye", " tr ", "tr:", "(tr)"
]
}
# Check channel name for country patterns
for country, patterns in country_patterns.items():
for pattern in patterns:
if pattern in channel_lower:
return country
# 3. EPG ID ANALYSIS (Good for country codes)
epg_country_map = {
"🇺🇸 United States": [".us", "usa.", ".com"],
"🇬🇧 United Kingdom": [".uk", ".gb", "british"],
"🇨🇦 Canada": [".ca", "canada."],
"🇩🇪 Germany": [".de", "german."],
"🇫🇷 France": [".fr", "france."],
"🇪🇸 Spain": [".es", "spain."],
"🇮🇹 Italy": [".it", "italy."],
"🇧🇷 Brazil": [".br", "brazil."],
"🇲🇽 Mexico": [".mx", "mexico."],
"🇦🇺 Australia": [".au", "australia."],
"🇳🇱 Netherlands": [".nl", "netherlands."],
"🇷🇺 Russia": [".ru", "russia."],
"🇨🇳 China": [".cn", "china."],
"🇯🇵 Japan": [".jp", "japan."],
"🇰🇷 South Korea": [".kr", "korea."],
"🇮🇳 India": [".in", "india."],
"🇵🇹 Portugal": [".pt", "portugal."],
"🇹🇷 Turkey": [".tr", "turkey."]
}
# Check EPG ID for country indicators
if epg_id:
for country, patterns in epg_country_map.items():
for pattern in patterns:
if pattern in epg_lower:
return country
# 4. LOGO URL ANALYSIS (Sometimes helpful)
if logo_url:
for country, patterns in epg_country_map.items():
for pattern in patterns:
if pattern in logo_lower:
return country
# If no match found, return Uncategorized
return "Uncategorized"
def apply_auto_country_detection(channel, group_overrides, settings):
"""
Enhanced version of apply_group_overrides that includes auto-detection.
"""
stream_name = channel.get('Stream name', '')
epg_id = channel.get('EPG id', '')
logo_url = channel.get('Logo', '')
current_group = channel.get('Group', 'Uncategorized')
# First try manual overrides (highest priority)
stream_name_lower = stream_name.lower()
for key, new_group in group_overrides.items():
if key.lower() in stream_name:
if key.lower() in stream_name_lower:
channel['Group'] = new_group
log_message(f"Manual override: '{channel.get('Stream name')}'{new_group}", "DEBUG")
log_message(f"Manual override: '{stream_name}'{new_group}", "DEBUG")
return channel
# Auto-detect country if enabled
# If auto-detection is enabled, try it
if settings.get('auto_detect_country', True):
detected_country = smart_country_detection(channel)
detected_country = detect_country_from_channel(stream_name, epg_id, logo_url)
# Normalize existing country names if enabled
if settings.get('normalize_country_names', True):
# Only override if we detected something specific (not "Uncategorized")
if detected_country != "Uncategorized":
channel['Group'] = detected_country
log_message(f"Auto-detected: '{stream_name}'{detected_country}", "INFO")
else:
# Only change if current group is not already a country
current_group_lower = original_group.lower()
is_already_country = any(
any(keyword in current_group_lower for keyword in keywords)
for keywords in COUNTRY_PATTERNS.values()
)
if not is_already_country:
channel['Group'] = detected_country
# Keep existing group or set to Uncategorized
if current_group in ['', 'Unknown', 'Other']:
channel['Group'] = "Uncategorized"
else:
# Auto-detection disabled, use manual overrides only
if current_group in ['', 'Unknown', 'Other']:
channel['Group'] = "Uncategorized"
return channel
@ -378,7 +340,6 @@ def remove_duplicates(channels, settings):
unique_channels.append(channel)
else:
duplicate_count += 1
log_message(f"Duplicate removed: {channel.get('Stream name', 'Unknown')}", "DEBUG")
if duplicate_count > 0:
log_message(f"Removed {duplicate_count} duplicate channels", "INFO")
@ -390,6 +351,7 @@ def remove_duplicates(channels, settings):
def process_import():
"""Process bulk import file."""
settings = load_settings()
group_overrides = load_group_overrides()
if not os.path.exists(IMPORT_FILE):
log_message(f"No {IMPORT_FILE} found, skipping import", "INFO")
@ -418,7 +380,7 @@ def process_import():
continue
channel_data = parse_m3u_entry(extinf_line, url_line)
channel_data = apply_country_detection(channel_data, settings)
channel_data = apply_auto_country_detection(channel_data, group_overrides, settings)
if channel_data.get('Stream name') and channel_data.get('Stream URL'):
imported_channels.append(channel_data)
@ -494,9 +456,10 @@ def generate_playlist():
if os.path.exists(LOG_FILE):
open(LOG_FILE, 'w').close()
log_message("Starting playlist generation with smart country detection...", "INFO")
log_message("Starting playlist generation...", "INFO")
settings = load_settings()
group_overrides = load_group_overrides()
# Process import
imported_channels = process_import()
@ -518,8 +481,7 @@ def generate_playlist():
if block.strip():
channel = parse_channel_block(block)
if channel:
# Apply country detection to existing channels too
channel = apply_country_detection(channel, settings)
channel = apply_auto_country_detection(channel, group_overrides, settings)
parsed_channels.append(channel)
log_message(f"Parsed {len(parsed_channels)} channels", "INFO")
@ -527,25 +489,18 @@ def generate_playlist():
# Remove duplicates
parsed_channels = remove_duplicates(parsed_channels, settings)
# Sort channels by country then name
# Sort channels
if settings.get('sort_channels', True):
parsed_channels.sort(key=lambda x: (x.get('Group', '').lower(), x.get('Stream name', '').lower()))
log_message("Channels sorted by country and name", "INFO")
# Log country distribution
country_counts = {}
for channel in parsed_channels:
country = channel.get('Group', 'Unknown')
country_counts[country] = country_counts.get(country, 0) + 1
log_message("Country distribution:", "INFO")
for country, count in sorted(country_counts.items()):
log_message(f" {country}: {count} channels", "INFO")
# Build M3U
m3u_lines = ["#EXTM3U"]
valid_channels = 0
# Count channels by country for stats
country_stats = {}
for channel in parsed_channels:
stream_name = channel.get('Stream name', '')
group_name = channel.get('Group', 'Uncategorized')
@ -568,29 +523,22 @@ def generate_playlist():
m3u_lines.append(stream_url)
valid_channels += 1
# Count by country
country_stats[group_name] = country_stats.get(group_name, 0) + 1
# Write M3U
try:
with open(PLAYLIST_FILE, 'w', encoding='utf-8') as f:
for line in m3u_lines:
f.write(line + '\n')
log_message(f"Generated {PLAYLIST_FILE} with {valid_channels} channels", "INFO")
# Log country statistics
log_message(f"Channels by country: {dict(sorted(country_stats.items(), key=lambda x: x[1], reverse=True))}", "INFO")
except Exception as e:
log_message(f"Error writing playlist: {e}", "ERROR")
# Update channels.txt with new country assignments
if settings.get('normalize_country_names', True):
try:
with open(CHANNELS_FILE, 'w', encoding='utf-8') as f:
for i, channel in enumerate(parsed_channels):
if i > 0:
f.write("\n\n")
block_content = convert_to_channels_txt_block(channel)
f.write(block_content)
log_message("Updated channels.txt with normalized country names", "INFO")
except Exception as e:
log_message(f"Error updating channels.txt: {e}", "ERROR")
log_message("Playlist generation complete", "INFO")
if __name__ == "__main__":