From 0614f34f863f88e1626a23b01c14b173d72318d7 Mon Sep 17 00:00:00 2001 From: stoney420 Date: Fri, 27 Jun 2025 18:12:47 +0200 Subject: [PATCH] Update scripts/generate_playlist.py --- scripts/generate_playlist.py | 305 ++++++++++++++++++++++++++++++++--- 1 file changed, 283 insertions(+), 22 deletions(-) diff --git a/scripts/generate_playlist.py b/scripts/generate_playlist.py index 12eb8c7..c3dda98 100644 --- a/scripts/generate_playlist.py +++ b/scripts/generate_playlist.py @@ -3,16 +3,173 @@ import os import json from datetime import datetime -# --- Simple Configuration --- +# --- Configuration --- CHANNELS_FILE = 'channels.txt' PLAYLIST_FILE = 'playlist.m3u' IMPORT_FILE = 'bulk_import.m3u' LOG_FILE = 'playlist_update.log' - -# Config files (optional) SETTINGS_FILE = 'config/settings.json' GROUP_OVERRIDES_FILE = 'config/group_overrides.json' +# Country detection patterns +COUNTRY_PATTERNS = { + # United Kingdom + "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom": [ + "uk", "united kingdom", "britain", "british", "england", "scotland", "wales", + "bbc", "itv", "sky", "channel 4", "channel5", "dave", "really", "yesterday", + "drama", "pick", "alibi", "eden", "gold", "w+1", "more4", "e4", "film4", + "quest", "discovery uk", "eurosport uk", "bt sport" + ], + + # United States + "๐Ÿ‡บ๐Ÿ‡ธ United States": [ + "usa", "us", "united states", "america", "american", + "cnn", "fox news", "msnbc", "abc", "nbc", "cbs", "espn", "fox sports", + "disney", "nickelodeon", "cartoon network", "tnt", "tbs", "usa network", + "fx", "amc", "discovery", "history", "nat geo", "hgtv", "food network" + ], + + # Canada + "๐Ÿ‡จ๐Ÿ‡ฆ Canada": [ + "canada", "canadian", "cbc", "ctv", "global", "city tv", "tvo", "ici", + "sportsnet", "tsn", "rds", "aptn", "ztele", "canal d", "tele quebec" + ], + + # Australia + "๐Ÿ‡ฆ๐Ÿ‡บ Australia": [ + "australia", "australian", "aussie", "abc au", "sbs", "nine", "ten", + "seven", "foxtel", "optus sport", "kayo" + ], + + # Germany + "๐Ÿ‡ฉ๐Ÿ‡ช Germany": [ + "germany", "german", "deutschland", "ard", "zdf", "rtl", "sat.1", "pro7", + "vox", "kabel", "sport1", "eurosport de", "sky de" + ], + + # France + "๐Ÿ‡ซ๐Ÿ‡ท France": [ + "france", "french", "tf1", "france 2", "france 3", "france 5", "m6", + "canal+", "bfm", "cnews", "rmc", "eurosport fr" + ], + + # Spain + "๐Ÿ‡ช๐Ÿ‡ธ Spain": [ + "spain", "spanish", "espaรฑa", "tve", "antena 3", "cuatro", "telecinco", + "la sexta", "canal sur", "telemadrid", "movistar" + ], + + # Italy + "๐Ÿ‡ฎ๐Ÿ‡น Italy": [ + "italy", "italian", "italia", "rai", "mediaset", "canale 5", "italia 1", + "rete 4", "la7", "sky italia" + ], + + # Netherlands + "๐Ÿ‡ณ๐Ÿ‡ฑ Netherlands": [ + "netherlands", "dutch", "nederland", "npo", "rtl nl", "sbs nl", "veronica", + "net5", "rtl 4", "rtl 5", "rtl 7" + ], + + # Belgium + "๐Ÿ‡ง๐Ÿ‡ช Belgium": [ + "belgium", "belgian", "vtm", "een", "canvas", "ketnet", "rtbf", "la une" + ], + + # Portugal + "๐Ÿ‡ต๐Ÿ‡น Portugal": [ + "portugal", "portuguese", "rtp", "sic", "tvi", "porto canal", "benfica tv" + ], + + # India + "๐Ÿ‡ฎ๐Ÿ‡ณ India": [ + "india", "indian", "hindi", "bollywood", "zee", "star plus", "colors", + "sony", "dd national", "aaj tak", "ndtv", "times now" + ], + + # Brazil + "๐Ÿ‡ง๐Ÿ‡ท Brazil": [ + "brazil", "brazilian", "brasil", "globo", "sbt", "record", "band", + "rede tv", "cultura", "sportv" + ], + + # Mexico + "๐Ÿ‡ฒ๐Ÿ‡ฝ Mexico": [ + "mexico", "mexican", "televisa", "tv azteca", "canal 5", "las estrellas", + "canal once", "imagen" + ], + + # Arabic/Middle East + "๐Ÿ‡ธ๐Ÿ‡ฆ Arabic": [ + "arabic", "arab", "al jazeera", "mbc", "dubai", "abu dhabi", "qatar", + "saudi", "kuwait", "lebanon", "syria", "iraq", "jordan" + ], + + # Turkey + "๐Ÿ‡น๐Ÿ‡ท Turkey": [ + "turkey", "turkish", "trt", "atv", "kanal d", "star tv", "fox tr", + "show tv", "ntv" + ], + + # Russia + "๐Ÿ‡ท๐Ÿ‡บ Russia": [ + "russia", "russian", "rt", "channel one", "ั€ะพััะธั", "ะฝั‚ะฒ", "ั‚ะฝั‚" + ], + + # Poland + "๐Ÿ‡ต๐Ÿ‡ฑ Poland": [ + "poland", "polish", "tvp", "polsat", "tvn", "tv4", "canal+ pl" + ], + + # Sweden + "๐Ÿ‡ธ๐Ÿ‡ช Sweden": [ + "sweden", "swedish", "svt", "tv4", "kanal 5", "tv6", "tv8" + ], + + # Norway + "๐Ÿ‡ณ๐Ÿ‡ด Norway": [ + "norway", "norwegian", "nrk", "tv2", "tvnorge", "max" + ], + + # Denmark + "๐Ÿ‡ฉ๐Ÿ‡ฐ Denmark": [ + "denmark", "danish", "dr", "tv2 dk", "kanal 5 dk", "6eren" + ], + + # Finland + "๐Ÿ‡ซ๐Ÿ‡ฎ Finland": [ + "finland", "finnish", "yle", "mtv3", "nelonen", "sub" + ], + + # Greece + "๐Ÿ‡ฌ๐Ÿ‡ท Greece": [ + "greece", "greek", "ert", "mega", "ant1", "alpha", "skai" + ], + + # China + "๐Ÿ‡จ๐Ÿ‡ณ China": [ + "china", "chinese", "cctv", "cgtn", "phoenix", "tvb", "ไธญๅ›ฝ", "ไธญๅคฎ" + ], + + # Japan + "๐Ÿ‡ฏ๐Ÿ‡ต Japan": [ + "japan", "japanese", "nhk", "fuji tv", "tbs", "tv asahi", "nippon tv" + ], + + # South Korea + "๐Ÿ‡ฐ๐Ÿ‡ท South Korea": [ + "korea", "korean", "kbs", "mbc", "sbs", "jtbc", "tvn" + ], + + # International/Global + "๐ŸŒ International": [ + "international", "global", "world", "euro", "euronews", "dw", + "france 24", "cnn international", "bbc world", "sky news", + "bloomberg", "cnbc", "discovery", "national geographic", + "animal planet", "history", "travel", "mtv", "vh1", "nickelodeon" + ] +} + def log_message(message, level="INFO"): """Logs messages to file and prints them.""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -32,7 +189,9 @@ def load_settings(): "remove_duplicates": True, "sort_channels": True, "backup_before_import": True, - "auto_cleanup_import": True + "auto_cleanup_import": True, + "auto_detect_country": True, + "normalize_country_names": True } if os.path.exists(SETTINGS_FILE): @@ -45,8 +204,62 @@ def load_settings(): return default_settings +def detect_country_from_text(text): + """Detect country from channel name, group, or other text.""" + text_lower = text.lower() + + # Score each country based on keyword matches + country_scores = {} + + for country, keywords in COUNTRY_PATTERNS.items(): + score = 0 + for keyword in keywords: + if keyword in text_lower: + # Give higher score for exact matches and longer keywords + score += len(keyword) * (2 if keyword == text_lower else 1) + + if score > 0: + country_scores[country] = score + + # Return country with highest score + if country_scores: + best_country = max(country_scores, key=country_scores.get) + return best_country, country_scores[best_country] + + return None, 0 + +def smart_country_detection(channel): + """Smart country detection using multiple sources.""" + # Sources to check (in order of priority) + sources = [ + ("Stream name", channel.get('Stream name', '')), + ("Group", channel.get('Group', '')), + ("EPG id", channel.get('EPG id', '')), + ("Logo", channel.get('Logo', '')) + ] + + best_country = None + best_score = 0 + detection_source = None + + for source_name, text in sources: + if text: + country, score = detect_country_from_text(text) + if country and score > best_score: + best_country = country + best_score = score + detection_source = source_name + + # Log detection for debugging + if best_country: + log_message(f"Country detection: '{channel.get('Stream name', 'Unknown')}' โ†’ {best_country} (from {detection_source}, score: {best_score})", "DEBUG") + else: + log_message(f"Country detection: Could not detect country for '{channel.get('Stream name', 'Unknown')}'", "DEBUG") + + return best_country or "๐ŸŒ International" + def load_group_overrides(): - """Load group overrides.""" + """Load manual group overrides.""" if os.path.exists(GROUP_OVERRIDES_FILE): try: with open(GROUP_OVERRIDES_FILE, 'r', encoding='utf-8') as f: @@ -56,6 +269,40 @@ def load_group_overrides(): return {} +def apply_country_detection(channel, settings): + """Apply country detection and overrides.""" + original_group = channel.get('Group', 'Uncategorized') + + # Check manual overrides first + group_overrides = load_group_overrides() + stream_name = channel.get('Stream name', '').lower() + + for key, new_group in group_overrides.items(): + if key.lower() in stream_name: + channel['Group'] = new_group + log_message(f"Manual override: '{channel.get('Stream name')}' โ†’ {new_group}", "DEBUG") + return channel + + # Auto-detect country if enabled + if settings.get('auto_detect_country', True): + detected_country = smart_country_detection(channel) + + # Normalize existing country names if enabled + if settings.get('normalize_country_names', True): + channel['Group'] = detected_country + else: + # Only change if current group is not already a country + current_group_lower = original_group.lower() + is_already_country = any( + any(keyword in current_group_lower for keyword in keywords) + for keywords in COUNTRY_PATTERNS.values() + ) + + if not is_already_country: + channel['Group'] = detected_country + + return channel + def parse_channel_block(block): """Parse a channel block from channels.txt.""" channel_data = {} @@ -92,17 +339,6 @@ def parse_m3u_entry(extinf_line, url_line): return channel -def apply_group_overrides(channel, group_overrides): - """Apply group overrides.""" - stream_name = channel.get('Stream name', '').lower() - - for key, new_group in group_overrides.items(): - if key.lower() in stream_name: - channel['Group'] = new_group - break - - return channel - def convert_to_channels_txt_block(channel_data): """Convert to channels.txt format.""" block = [] @@ -142,6 +378,7 @@ def remove_duplicates(channels, settings): unique_channels.append(channel) else: duplicate_count += 1 + log_message(f"Duplicate removed: {channel.get('Stream name', 'Unknown')}", "DEBUG") if duplicate_count > 0: log_message(f"Removed {duplicate_count} duplicate channels", "INFO") @@ -153,7 +390,6 @@ def remove_duplicates(channels, settings): def process_import(): """Process bulk import file.""" settings = load_settings() - group_overrides = load_group_overrides() if not os.path.exists(IMPORT_FILE): log_message(f"No {IMPORT_FILE} found, skipping import", "INFO") @@ -182,7 +418,7 @@ def process_import(): continue channel_data = parse_m3u_entry(extinf_line, url_line) - channel_data = apply_group_overrides(channel_data, group_overrides) + channel_data = apply_country_detection(channel_data, settings) if channel_data.get('Stream name') and channel_data.get('Stream URL'): imported_channels.append(channel_data) @@ -258,10 +494,9 @@ def generate_playlist(): if os.path.exists(LOG_FILE): open(LOG_FILE, 'w').close() - log_message("Starting playlist generation...", "INFO") + log_message("Starting playlist generation with smart country detection...", "INFO") settings = load_settings() - group_overrides = load_group_overrides() # Process import imported_channels = process_import() @@ -283,7 +518,8 @@ def generate_playlist(): if block.strip(): channel = parse_channel_block(block) if channel: - channel = apply_group_overrides(channel, group_overrides) + # Apply country detection to existing channels too + channel = apply_country_detection(channel, settings) parsed_channels.append(channel) log_message(f"Parsed {len(parsed_channels)} channels", "INFO") @@ -291,9 +527,20 @@ def generate_playlist(): # Remove duplicates parsed_channels = remove_duplicates(parsed_channels, settings) - # Sort channels + # Sort channels by country then name if settings.get('sort_channels', True): parsed_channels.sort(key=lambda x: (x.get('Group', '').lower(), x.get('Stream name', '').lower())) + log_message("Channels sorted by country and name", "INFO") + + # Log country distribution + country_counts = {} + for channel in parsed_channels: + country = channel.get('Group', 'Unknown') + country_counts[country] = country_counts.get(country, 0) + 1 + + log_message("Country distribution:", "INFO") + for country, count in sorted(country_counts.items()): + log_message(f" {country}: {count} channels", "INFO") # Build M3U m3u_lines = ["#EXTM3U"] @@ -330,6 +577,20 @@ def generate_playlist(): except Exception as e: log_message(f"Error writing playlist: {e}", "ERROR") + # Update channels.txt with new country assignments + if settings.get('normalize_country_names', True): + try: + with open(CHANNELS_FILE, 'w', encoding='utf-8') as f: + for i, channel in enumerate(parsed_channels): + if i > 0: + f.write("\n\n") + + block_content = convert_to_channels_txt_block(channel) + f.write(block_content) + log_message("Updated channels.txt with normalized country names", "INFO") + except Exception as e: + log_message(f"Error updating channels.txt: {e}", "ERROR") + log_message("Playlist generation complete", "INFO") if __name__ == "__main__":