#!/usr/bin/env python3 """ IPTV Enhanced Country Detection - Complete Working Version Uses 3-point analysis: Channel Name + EPG ID + Logo URL Then filters to keep only legitimate countries """ import os import re import shutil from datetime import datetime from pathlib import Path # Ensure correct directory script_dir = Path(__file__).parent root_dir = script_dir.parent os.chdir(root_dir) def clean_malformed_channel_name(raw_name): """Extract clean channel name from malformed EXTINF data.""" if not raw_name or len(raw_name) < 2: return "Unknown Channel" # Handle completely malformed entries like: # ".AB.ca",.AB.ca" tvg-logo="..." group-title="DaddyLive CA",CTV Canada [HD]" if raw_name.startswith('".') and 'tvg-logo=' in raw_name: # Extract the actual channel name after the last comma parts = raw_name.split(',') if len(parts) > 1: clean_name = parts[-1].strip().strip('"').strip() if clean_name: return clean_name # If it contains EXTINF data, extract the name if 'group-title=' in raw_name and ',' in raw_name: extinf_match = re.search(r'group-title="[^"]*",(.+)$', raw_name) if extinf_match: return extinf_match.group(1).strip().strip('"') # If it has extra quotes and domains, clean them if raw_name.startswith('.') and raw_name.count('"') > 2: parts = raw_name.split(',') for part in reversed(parts): cleaned = part.strip().strip('"').strip() if cleaned and not cleaned.startswith('.') and len(cleaned) > 2: if not any(x in cleaned.lower() for x in ['http', 'tvg-', 'group-title', '.com', '.ca', '.us']): return cleaned # Basic cleaning cleaned = raw_name.strip().strip('"').strip() # Remove leading dots and domains if cleaned.startswith('.'): cleaned = re.sub(r'^\.[\w.]+["\']*,?\s*', '', cleaned) # Remove trailing EXTINF attributes cleaned = re.sub(r'\s+tvg-.*$', '', cleaned) cleaned = re.sub(r'\s+group-title.*$', '', cleaned) return cleaned if cleaned and len(cleaned) > 1 else "Unknown Channel" def extract_epg_from_malformed(raw_name): """Extract EPG ID from malformed data.""" # Look for domain patterns like .AB.ca, .ON.ca, etc. domain_match = re.search(r'\.([A-Z]{2})\.ca', raw_name) if domain_match: province = domain_match.group(1) return f"generic.{province}.ca" # Look for .us domains domain_match = re.search(r'\.([A-Z]{2})\.us', raw_name) if domain_match: state = domain_match.group(1) return f"generic.{state}.us" return "" def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", stream_url=""): """ Enhanced country detection using 3-point analysis Priority: EPG ID > Logo URL > Channel Name > Stream URL """ # Combine all text for analysis all_text = f"{channel_name.lower()} {epg_id.lower()} {logo_url.lower()} {stream_url.lower()}" # STEP 1: Check for streaming services first (these go to Uncategorized) streaming_services = [ "plex", "pluto", "tubi", "samsung", "xumo", "stirr", "crackle", "imdb tv", "daddylive", "drew247", "aixmedia", "moveonjoy", "drewlive24", "udptv", "a1xs.vip", "zekonew", "forcedtoplay", "cdn1host", "tvpass.org", "jmp2.uk/plu-", "provider-static.plex.tv", "images.pluto.tv" ] for service in streaming_services: if service in all_text: return "Uncategorized" # STEP 2: EPG ID detection (most reliable) - Enhanced epg_patterns = { ".ca": "๐Ÿ‡จ๐Ÿ‡ฆ Canada", ".us": "๐Ÿ‡บ๐Ÿ‡ธ United States", ".uk": "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom", ".ph": "๐Ÿ‡ต๐Ÿ‡ญ Philippines", ".au": "๐Ÿ‡ฆ๐Ÿ‡บ Australia", ".jp": "๐Ÿ‡ฏ๐Ÿ‡ต Japan", ".my": "๐Ÿ‡ฒ๐Ÿ‡พ Malaysia", ".de": "๐Ÿ‡ฉ๐Ÿ‡ช Germany", ".fr": "๐Ÿ‡ซ๐Ÿ‡ท France", ".es": "๐Ÿ‡ช๐Ÿ‡ธ Spain", ".it": "๐Ÿ‡ฎ๐Ÿ‡น Italy", ".br": "๐Ÿ‡ง๐Ÿ‡ท Brazil", ".nl": "๐Ÿ‡ณ๐Ÿ‡ฑ Netherlands" } for domain, country in epg_patterns.items(): if domain in epg_id.lower(): return country # Enhanced Canadian EPG detection canadian_epg_patterns = [ "cbc.", "ctv.", "global.", "tsn.", "sportsnet.", "citytv.", "aptn.", ".ab.ca", ".bc.ca", ".mb.ca", ".nb.ca", ".nl.ca", ".ns.ca", ".nt.ca", ".nu.ca", ".on.ca", ".pe.ca", ".qc.ca", ".sk.ca", ".yt.ca", "cfcn", "cky", "ctfo", "cjoh", "ckws" ] for pattern in canadian_epg_patterns: if pattern in epg_id.lower() or pattern in all_text: return "๐Ÿ‡จ๐Ÿ‡ฆ Canada" # STEP 3: Enhanced specific channel fixes channel_lower = channel_name.lower() # Enhanced Canadian channels detection canadian_indicators = [ # TSN variations "tsn 1", "tsn 2", "tsn 3", "tsn 4", "tsn 5", "tsn1", "tsn2", "tsn3", "tsn4", "tsn5", # CBC variations "cbc news", "cbc toronto", "cbc calgary", "cbc vancouver", "cbc winnipeg", "cbc montreal", # CTV variations "ctv calgary", "ctv vancouver", "ctv toronto", "ctv winnipeg", "ctv ottawa", "ctv montreal", "ctv atlantic", "ctv edmonton", "ctv saskatoon", "ctv regina", "ctv kitchener", # Regional station calls "cfcn", "cky", "ctfo", "cjoh", "ckws", "cfrn", "cfqc", "ckck", "chch", # Other Canadian broadcasters "sportsnet", "global tv", "citytv", "aptn", "omni", "tvo", "tรฉlรฉ-quรฉbec" ] for indicator in canadian_indicators: if indicator in channel_lower: return "๐Ÿ‡จ๐Ÿ‡ฆ Canada" # Enhanced BBC handling (distinguish US vs UK) if "bbc" in channel_lower: # BBC America is US if "bbc america" in channel_lower: return "๐Ÿ‡บ๐Ÿ‡ธ United States" # Most other BBC channels are UK elif any(x in channel_lower for x in ["bbc one", "bbc two", "bbc three", "bbc four", "bbc news", "bbc iplayer", "bbc scotland", "bbc wales", "bbc comedy", "bbc drama", "bbc earth"]): # Check if it's specifically UK version if not any(x in all_text for x in ["america", ".us", "usa"]): return "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom" # US channels that were misclassified if any(x in channel_lower for x in ["tv land", "tvland", "we tv", "wetv", "all weddings we tv", "cheaters", "cheers", "christmas 365"]): return "๐Ÿ‡บ๐Ÿ‡ธ United States" # Enhanced US network detection us_networks = [ "cbs", "nbc", "abc", "fox", "cnn", "espn", "hbo", "showtime", "starz", "cinemax", "mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel", "discovery", "history", "tlc", "hgtv", "food network", "travel channel", "lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network", "tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic" ] for network in us_networks: if network in channel_lower and not any(x in all_text for x in ["canada", ".ca", "uk", ".uk"]): return "๐Ÿ‡บ๐Ÿ‡ธ United States" # UK channels (but not BBC America) if "come dine with me" in channel_lower or "itv" in channel_lower: return "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom" # Philippines news channels if any(x in channel_lower for x in ["anc global", "anc ph"]): return "๐Ÿ‡ต๐Ÿ‡ญ Philippines" # Japan anime channels if "animax" in channel_lower: return "๐Ÿ‡ฏ๐Ÿ‡ต Japan" # STEP 4: Logo URL analysis logo_patterns = { "๐Ÿ‡จ๐Ÿ‡ฆ Canada": ["/canada/", "/ca/", "canada.", "canadian"], "๐Ÿ‡บ๐Ÿ‡ธ United States": ["/usa/", "/us/", "united-states", "american"], "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom": ["/uk/", "/united-kingdom/", "british", "england"], "๐Ÿ‡ฉ๐Ÿ‡ช Germany": ["/germany/", "/de/", "german", "deutschland"], "๐Ÿ‡ซ๐Ÿ‡ท France": ["/france/", "/fr/", "french", "franรงais"], "๐Ÿ‡ฎ๐Ÿ‡น Italy": ["/italy/", "/it/", "italian", "italiano"], "๐Ÿ‡ช๐Ÿ‡ธ Spain": ["/spain/", "/es/", "spanish", "espaรฑol"], "๐Ÿ‡ณ๐Ÿ‡ฑ Netherlands": ["/netherlands/", "/nl/", "dutch", "nederland"], "๐Ÿ‡ฆ๐Ÿ‡บ Australia": ["/australia/", "/au/", "australian", "aussie"], "๐Ÿ‡ฏ๐Ÿ‡ต Japan": ["/japan/", "/jp/", "japanese", "ๆ—ฅๆœฌ"], "๐Ÿ‡ฐ๐Ÿ‡ท South Korea": ["/korea/", "/kr/", "korean", "ํ•œ๊ตญ"], "๐Ÿ‡ฎ๐Ÿ‡ณ India": ["/india/", "/in/", "indian", "เคญเคพเคฐเคค"], "๐Ÿ‡ง๐Ÿ‡ท Brazil": ["/brazil/", "/br/", "brazilian", "brasil"], "๐Ÿ‡ฒ๐Ÿ‡ฝ Mexico": ["/mexico/", "/mx/", "mexican", "mรฉxico"], "๐Ÿ‡ฆ๐Ÿ‡ท Argentina": ["/argentina/", "/ar/", "argentinian", "argentina"], "๐Ÿ‡ต๐Ÿ‡ญ Philippines": ["/philippines/", "/ph/", "filipino", "pilipinas"] } for country, patterns in logo_patterns.items(): for pattern in patterns: if pattern in logo_url.lower(): return country # STEP 5: Enhanced broadcaster patterns with more comprehensive coverage broadcaster_patterns = { "๐Ÿ‡จ๐Ÿ‡ฆ Canada": [ # Major networks "cbc", "ctv", "global", "citytv", "aptn", "omni", "tvo", # Sports "tsn", "sportsnet", "rds", "rds info", # Specialty "teletoon", "ytv", "treehouse", "family channel", "oln", "cottage life", "discovery canada", "history canada", "slice", "w network", "hgtv canada", "food network canada", "showcase", "crave", "super channel", "hollywood suite", # French Canadian "ici", "tรฉlรฉ-quรฉbec", "tva", "noovo", "canal d", "canal vie", # Regional identifiers "calgary", "vancouver", "toronto", "winnipeg", "montreal", "ottawa", "halifax", "edmonton", "saskatoon", "regina", "victoria", "quebec city" ], "๐Ÿ‡บ๐Ÿ‡ธ United States": [ # Major networks "cbs", "nbc", "abc", "fox", "cw", "pbs", "ion", "mynetworktv", # News "cnn", "fox news", "msnbc", "cnbc", "bloomberg", "newsmax", "oann", # Sports "espn", "fox sports", "nfl network", "mlb network", "nba tv", "nhl network", # Premium "hbo", "showtime", "starz", "cinemax", "epix", # Cable networks "mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel", "discovery", "history", "tlc", "hgtv", "food network", "travel channel", "lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network", "tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic", # But exclude specifically Canadian versions "usa", "america", "american", "united states" ], "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom": [ # BBC (but not BBC America) "bbc one", "bbc two", "bbc three", "bbc four", "bbc news", "bbc iplayer", "bbc scotland", "bbc wales", "bbc northern ireland", "bbc parliament", "bbc comedy", "bbc drama", "bbc earth", "bbc world news", # ITV "itv", "itv2", "itv3", "itv4", "itv be", "itvx", # Channel 4 "channel 4", "channel 5", "e4", "more4", "film4", # Sky "sky", "sky news", "sky sports", "sky one", "sky two", "sky atlantic", # Other UK "dave", "really", "yesterday", "drama", "alibi", "gold", "living", "discovery uk", "history uk", "tlc uk", "quest", "dmax uk", # UK specific terms "british", "england", "scotland", "wales", "northern ireland", "uk" ], "๐Ÿ‡ฉ๐Ÿ‡ช Germany": [ "ard", "zdf", "rtl", "pro7", "sat.1", "vox", "kabel eins", "super rtl", "rtl2", "discovery germany", "history germany", "tlc germany", "dmax", "sixx", "tele 5" ], "๐Ÿ‡ซ๐Ÿ‡ท France": [ "tf1", "france 2", "france 3", "france 5", "m6", "canal+", "arte", "w9", "tmc", "discovery france", "history france", "tlc france", "planete+", "ushuaia tv" ], "๐Ÿ‡ฎ๐Ÿ‡น Italy": [ "rai", "canale 5", "italia 1", "rete 4", "la7", "tv8", "nove", "20 mediaset", "discovery italia", "history italia", "dmax italia", "real time", "giallo" ], "๐Ÿ‡ช๐Ÿ‡ธ Spain": [ "tve", "la 1", "la 2", "antena 3", "cuatro", "telecinco", "la sexta", "nova", "discovery spain", "history spain", "dmax spain", "mega", "neox", "clan" ], "๐Ÿ‡ณ๐Ÿ‡ฑ Netherlands": [ "npo", "rtl 4", "rtl 5", "rtl 7", "sbs6", "veronica", "net5", "rtl z", "discovery netherlands", "history netherlands", "tlc netherlands" ], "๐Ÿ‡ฆ๐Ÿ‡บ Australia": [ "abc australia", "nine network", "seven network", "ten", "foxtel", "discovery australia", "history australia", "lifestyle" ], "๐Ÿ‡ฏ๐Ÿ‡ต Japan": [ "nhk", "fuji tv", "tbs", "tv asahi", "tv tokyo", "nippon tv", "animax" ], "๐Ÿ‡ฐ๐Ÿ‡ท South Korea": [ "kbs", "mbc", "sbs", "jtbc", "tvn", "ocn" ], "๐Ÿ‡ฎ๐Ÿ‡ณ India": [ "zee", "star plus", "colors", "sony tv", "& tv", "discovery india" ], "๐Ÿ‡ง๐Ÿ‡ท Brazil": [ "globo", "sbt", "record", "band", "discovery brasil" ], "๐Ÿ‡ฒ๐Ÿ‡ฝ Mexico": [ "televisa", "tv azteca", "once tv", "discovery mexico" ], "๐Ÿ‡ฆ๐Ÿ‡ท Argentina": [ "telefe", "canal 13", "america tv", "discovery argentina" ], "๐Ÿ‡ต๐Ÿ‡ญ Philippines": [ "abs-cbn", "gma", "anc", "tv5", "pba rush" ] } # Enhanced pattern matching with conflict resolution for country, keywords in broadcaster_patterns.items(): for keyword in keywords: if keyword in all_text: # Special handling for conflicting patterns if country == "๐Ÿ‡บ๐Ÿ‡ธ United States" and any(ca_term in all_text for ca_term in [".ca", "canada", "canadian"]): continue # Skip US assignment if Canadian indicators present if country == "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom" and "america" in all_text: continue # Skip UK assignment if "america" is present return country return "Uncategorized" def is_valid_country_group(group_name): """Check if group name is a valid country (not a streaming service)""" valid_countries = [ "๐Ÿ‡บ๐Ÿ‡ธ United States", "๐Ÿ‡จ๐Ÿ‡ฆ Canada", "๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom", "๐Ÿ‡ฉ๐Ÿ‡ช Germany", "๐Ÿ‡ซ๐Ÿ‡ท France", "๐Ÿ‡ฎ๐Ÿ‡น Italy", "๐Ÿ‡ช๐Ÿ‡ธ Spain", "๐Ÿ‡ณ๐Ÿ‡ฑ Netherlands", "๐Ÿ‡ง๐Ÿ‡ช Belgium", "๐Ÿ‡ฆ๐Ÿ‡น Austria", "๐Ÿ‡จ๐Ÿ‡ญ Switzerland", "๐Ÿ‡ธ๐Ÿ‡ช Sweden", "๐Ÿ‡ณ๐Ÿ‡ด Norway", "๐Ÿ‡ฉ๐Ÿ‡ฐ Denmark", "๐Ÿ‡ซ๐Ÿ‡ฎ Finland", "๐Ÿ‡ต๐Ÿ‡ฑ Poland", "๐Ÿ‡จ๐Ÿ‡ฟ Czech Republic", "๐Ÿ‡ญ๐Ÿ‡บ Hungary", "๐Ÿ‡ต๐Ÿ‡น Portugal", "๐Ÿ‡ฌ๐Ÿ‡ท Greece", "๐Ÿ‡ท๐Ÿ‡ด Romania", "๐Ÿ‡ง๐Ÿ‡ฌ Bulgaria", "๐Ÿ‡ญ๐Ÿ‡ท Croatia", "๐Ÿ‡ท๐Ÿ‡ธ Serbia", "๐Ÿ‡ฆ๐Ÿ‡บ Australia", "๐Ÿ‡ฏ๐Ÿ‡ต Japan", "๐Ÿ‡ฐ๐Ÿ‡ท South Korea", "๐Ÿ‡ฎ๐Ÿ‡ณ India", "๐Ÿ‡จ๐Ÿ‡ณ China", "๐Ÿ‡ง๐Ÿ‡ท Brazil", "๐Ÿ‡ฒ๐Ÿ‡ฝ Mexico", "๐Ÿ‡ฆ๐Ÿ‡ท Argentina", "๐Ÿ‡จ๐Ÿ‡ฑ Chile", "๐Ÿ‡จ๐Ÿ‡ด Colombia", "๐Ÿ‡ท๐Ÿ‡บ Russia", "๐Ÿ‡น๐Ÿ‡ท Turkey", "๐Ÿ‡ธ๐Ÿ‡ฆ Saudi Arabia", "๐Ÿ‡ฆ๐Ÿ‡ช UAE", "๐Ÿ‡ช๐Ÿ‡ฌ Egypt", "๐Ÿ‡ฟ๐Ÿ‡ฆ South Africa", "๐Ÿ‡ณ๐Ÿ‡ฌ Nigeria", "๐Ÿ‡ฐ๐Ÿ‡ช Kenya", "๐Ÿ‡ฎ๐Ÿ‡ฑ Israel", "๐Ÿ‡น๐Ÿ‡ญ Thailand", "๐Ÿ‡ป๐Ÿ‡ณ Vietnam", "๐Ÿ‡ต๐Ÿ‡ญ Philippines", "๐Ÿ‡ฎ๐Ÿ‡ฉ Indonesia", "๐Ÿ‡ฒ๐Ÿ‡พ Malaysia", "๐Ÿ‡ธ๐Ÿ‡ฌ Singapore" ] return group_name in valid_countries def load_channels(): """Load channels from channels.txt with integrated data cleanup.""" if not os.path.exists('channels.txt'): print("โŒ No channels.txt found") return [] try: with open('channels.txt', 'r', encoding='utf-8') as f: content = f.read() channels = [] cleaned_count = 0 print("๐Ÿงน Step 1: Data Cleanup (fixing malformed entries)") print("-" * 50) for block in content.split('\n\n'): if not block.strip(): continue channel_data = {} for line in block.strip().split('\n'): if '=' in line: key, value = line.split('=', 1) key = key.strip() value = value.strip() if key == "Stream name": # Check if this is malformed if (value.startswith('".') or 'tvg-logo=' in value or 'group-title=' in value or value.count('"') > 2): # Clean the malformed name clean_name = clean_malformed_channel_name(value) channel_data["Stream name"] = clean_name # Extract EPG ID if missing if not channel_data.get("EPG id"): extracted_epg = extract_epg_from_malformed(value) if extracted_epg: channel_data["EPG id"] = extracted_epg cleaned_count += 1 if cleaned_count <= 10: # Show first 10 examples print(f"๐Ÿ”ง Fixed: '{value[:40]}...' โ†’ '{clean_name}'") else: channel_data[key] = value else: channel_data[key] = value # Only add channels with valid names if (channel_data.get('Stream name') and len(channel_data.get('Stream name', '')) > 1 and channel_data.get('Stream name') != "Unknown Channel"): channels.append(channel_data) print(f"โœ… Data cleanup complete: {cleaned_count} entries fixed") print(f"๐Ÿ“Š Loaded {len(channels)} channels (after cleanup)") return channels except Exception as e: print(f"โŒ Error loading channels: {e}") return [] def reorganize_channels(channels): """Enhanced reorganization with integrated cleanup + 3-point analysis.""" print("\n๐Ÿ” Step 2: Enhanced Country Detection with 3-Point Analysis") print("๐Ÿ“Š Analyzing: Channel Name + EPG ID + Logo URL") print("-" * 60) changes = 0 stats = { 'country_detected': 0, 'sent_to_uncategorized': 0, 'kept_existing_country': 0, 'streaming_filtered': 0 } country_counts = {} for channel in channels: old_group = channel.get('Group', 'Uncategorized') stream_name = channel.get('Stream name', '') epg_id = channel.get('EPG id', '') logo = channel.get('Logo', '') stream_url = channel.get('Stream URL', '') # Detect country using enhanced 3-point analysis detected_country = detect_country_from_channel_content(stream_name, epg_id, logo, stream_url) # Debug output for first few channels to see what's happening if changes < 5: print(f"๐Ÿ” Debug: '{stream_name}' | EPG: '{epg_id}' | Detected: {detected_country}") # Decide final group if is_valid_country_group(old_group) and detected_country != "Uncategorized": # Keep existing valid country final_group = old_group stats['kept_existing_country'] += 1 elif detected_country != "Uncategorized": # Use detected country final_group = detected_country stats['country_detected'] += 1 if old_group != detected_country: print(f"๐Ÿ” Fixed: '{stream_name}' {old_group} โ†’ {detected_country}") changes += 1 else: # Send to Uncategorized final_group = "Uncategorized" stats['sent_to_uncategorized'] += 1 if old_group != "Uncategorized": # Check if it's a streaming service if any(service in stream_name.lower() for service in ['samsung', 'pluto', 'plex', 'tubi']): stats['streaming_filtered'] += 1 print(f"๐Ÿ“ฑ Platform: '{stream_name}' โ†’ Uncategorized") else: print(f"โ“ Undetected: '{stream_name}' โ†’ Uncategorized") changes += 1 channel['Group'] = final_group country_counts[final_group] = country_counts.get(final_group, 0) + 1 print(f"\n๐Ÿ“Š PROCESSING RESULTS:") print(f"โœ… Changes made: {changes}") print(f"๐Ÿ” Country detected: {stats['country_detected']}") print(f"โœ… Kept existing countries: {stats['kept_existing_country']}") print(f"๐Ÿ“ฑ Streaming services filtered: {stats['streaming_filtered']}") print(f"โ“ Sent to Uncategorized: {stats['sent_to_uncategorized']}") print(f"\n๐ŸŒ FINAL GROUP DISTRIBUTION:") sorted_countries = sorted(country_counts.items(), key=lambda x: (x[0] == "Uncategorized", -x[1])) for country, count in sorted_countries: percentage = (count / len(channels) * 100) if len(channels) > 0 else 0 print(f" {country}: {count} channels ({percentage:.1f}%)") return channels def save_channels(channels): """Save channels to file.""" # Backup if os.path.exists('channels.txt'): backup = f"channels_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt" shutil.copy2('channels.txt', backup) print(f"๐Ÿ“‹ Backup: {backup}") try: with open('channels.txt', 'w', encoding='utf-8') as f: for i, channel in enumerate(channels): if i > 0: f.write("\n\n") f.write(f"Group = {channel.get('Group', 'Uncategorized')}\n") f.write(f"Stream name = {channel.get('Stream name', 'Unknown')}\n") f.write(f"Logo = {channel.get('Logo', '')}\n") f.write(f"EPG id = {channel.get('EPG id', '')}\n") f.write(f"Stream URL = {channel.get('Stream URL', '')}\n") print(f"โœ… Saved {len(channels)} channels") return True except Exception as e: print(f"โŒ Save error: {e}") return False def generate_m3u(channels): """Generate M3U playlist.""" try: with open('playlist.m3u', 'w', encoding='utf-8') as f: f.write('#EXTM3U\n') for channel in channels: name = channel.get('Stream name', '') group = channel.get('Group', 'Uncategorized') logo = channel.get('Logo', '') epg_id = channel.get('EPG id', '') url = channel.get('Stream URL', '') if name and url: f.write(f'#EXTINF:-1 group-title="{group}"') if logo: f.write(f' tvg-logo="{logo}"') if epg_id: f.write(f' tvg-id="{epg_id}"') f.write(f',{name}\n{url}\n') print("โœ… Generated playlist.m3u") return True except Exception as e: print(f"โŒ M3U error: {e}") return False def main(): """Main function with integrated data cleanup and country detection.""" print("๐ŸŽฏ Enhanced IPTV Processing - Data Cleanup + Country Detection") print("=" * 80) print("๐Ÿงน Step 1: Fix malformed channel data") print("๐Ÿ” Step 2: 3-point country analysis (Channel Name + EPG ID + Logo URL)") print("๐ŸŽฏ Step 3: Filter streaming services to Uncategorized") print("=" * 80) channels = load_channels() if not channels: return False # Enhanced reorganization with cleanup channels = reorganize_channels(channels) # Sort: Countries first (alphabetically), then Uncategorized last channels.sort(key=lambda x: ( "zzz" if x.get('Group') == "Uncategorized" else x.get('Group', ''), x.get('Stream name', '') )) # Save and generate if not save_channels(channels): return False if not generate_m3u(channels): return False # Clear import try: with open('bulk_import.m3u', 'w', encoding='utf-8') as f: f.write('#EXTM3U\n') print("๐Ÿงน Cleared import file") except: pass print("\n๐ŸŽ‰ ENHANCED PROCESSING COMPLETE!") print("โœ… Malformed data cleaned and fixed") print("โœ… 3-point analysis applied to all channels") print("โœ… Countries detected from EPG ID, Logo URL, and Channel Names") print("โœ… Streaming services filtered to Uncategorized") print("โœ… Clean country-organized playlist generated") # Final statistics uncategorized_count = sum(1 for ch in channels if ch.get('Group') == 'Uncategorized') success_rate = ((len(channels) - uncategorized_count) / len(channels) * 100) if len(channels) > 0 else 0 print(f"\n๐Ÿ“Š FINAL STATISTICS:") print(f" Total channels: {len(channels)}") print(f" Properly categorized: {len(channels) - uncategorized_count} ({success_rate:.1f}%)") print(f" In Uncategorized: {uncategorized_count} ({100 - success_rate:.1f}%)") return True if __name__ == "__main__": success = main() exit(0 if success else 1)