Update scripts/generate_playlist.py
Some checks are pending
Generate M3U Playlist with Auto-Organization / build-and-organize (push) Waiting to run

This commit is contained in:
stoney420 2025-06-29 05:46:24 +02:00
parent f7079b8779
commit e23f5a05dc

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
IPTV Enhanced Country Detection - Complete Working Version
IPTV Enhanced Country Detection - Updated Version
Uses 3-point analysis: Channel Name + EPG ID + Logo URL
Then filters to keep only legitimate countries
"""
@ -14,7 +14,239 @@ from pathlib import Path
# Ensure correct directory
script_dir = Path(__file__).parent
root_dir = script_dir.parent
os.chdir(root_dir)
# The following line is removed to ensure stable execution within the action
# os.chdir(root_dir)
def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", stream_url=""):
"""
Enhanced country detection using 3-point analysis
Priority: EPG ID > Logo URL > Channel Name > Stream URL
"""
# Combine all text for analysis
all_text = f"{channel_name.lower()} {epg_id.lower()} {logo_url.lower()} {stream_url.lower()}"
# STEP 1: Check for streaming services first (these go to Uncategorized)
streaming_services = [
"plex", "pluto", "tubi", "samsung", "xumo", "stirr", "crackle", "imdb tv",
"daddylive", "drew247", "aixmedia", "moveonjoy", "drewlive24", "udptv",
"a1xs.vip", "zekonew", "forcedtoplay", "cdn1host", "tvpass.org",
"jmp2.uk/plu-", "provider-static.plex.tv", "images.pluto.tv"
]
for service in streaming_services:
if service in all_text:
[cite_start]return "Uncategorized" [cite: 152]
# STEP 2: EPG ID detection (most reliable) - Enhanced
epg_patterns = {
".ca": "🇨🇦 Canada",
".us": "🇺🇸 United States",
".uk": "🇬🇧 United Kingdom",
".ph": "🇵🇭 Philippines",
".au": "🇦🇺 Australia",
".jp": "🇯🇵 Japan",
[cite_start]".my": "🇲🇾 Malaysia", [cite: 153]
[cite_start]".de": "🇩🇪 Germany", [cite: 153]
[cite_start]".fr": "🇫🇷 France", [cite: 153]
[cite_start]".es": "🇪🇸 Spain", [cite: 153]
[cite_start]".it": "🇮🇹 Italy", [cite: 153]
[cite_start]".br": "🇧🇷 Brazil", [cite: 153]
[cite_start]".nl": "🇳🇱 Netherlands" [cite: 153]
}
for domain, country in epg_patterns.items():
if domain in epg_id.lower():
[cite_start]return country [cite: 154]
# Enhanced Canadian EPG detection
canadian_epg_patterns = [
"cbc.", "ctv.", "global.", "tsn.", "sportsnet.", "citytv.", "aptn.",
".ab.ca", ".bc.ca", ".mb.ca", ".nb.ca", ".nl.ca", ".ns.ca", ".nt.ca",
".nu.ca", ".on.ca", ".pe.ca", ".qc.ca", ".sk.ca", ".yt.ca",
"cfcn", "cky", "ctfo", "cjoh", "ckws"
]
for pattern in canadian_epg_patterns:
[cite_start]if pattern in epg_id.lower() or pattern in all_text: [cite: 155]
return "🇨🇦 Canada"
# STEP 3: Enhanced specific channel fixes
channel_lower = channel_name.lower()
# Enhanced Canadian channels detection
canadian_indicators = [
# TSN variations
"tsn 1", "tsn 2", "tsn 3", "tsn 4", "tsn 5", "tsn1", "tsn2", "tsn3", "tsn4", "tsn5",
# CBC variations
[cite_start]"cbc news", "cbc toronto", "cbc calgary", "cbc vancouver", "cbc winnipeg", "cbc montreal", [cite: 156]
# CTV variations
[cite_start]"ctv calgary", "ctv vancouver", "ctv toronto", "ctv winnipeg", "ctv ottawa", "ctv montreal", [cite: 156]
[cite_start]"ctv atlantic", "ctv edmonton", "ctv saskatoon", "ctv regina", "ctv kitchener", [cite: 156]
# Regional station calls
[cite_start]"cfcn", "cky", "ctfo", "cjoh", "ckws", "cfrn", "cfqc", "ckck", "chch", [cite: 156]
# Other Canadian broadcasters
[cite_start]"sportsnet", "global tv", "citytv", "aptn", "omni", "tvo", "télé-québec" [cite: 157]
]
for indicator in canadian_indicators:
if indicator in channel_lower:
return "🇨🇦 Canada"
# Enhanced BBC handling (distinguish US vs UK)
if "bbc" in channel_lower:
# BBC America is US
[cite_start]if "bbc america" in channel_lower: [cite: 158]
[cite_start]return "🇺🇸 United States" [cite: 158]
# Most other BBC channels are UK
elif any(x in channel_lower for x in ["bbc one", "bbc two", "bbc three", "bbc four",
[cite_start]"bbc news", "bbc iplayer", "bbc scotland", "bbc wales", [cite: 159]
[cite_start]"bbc comedy", "bbc drama", "bbc earth"]): [cite: 159]
# Check if it's specifically UK version
[cite_start]if not any(x in all_text for x in ["america", ".us", "usa"]): [cite: 159, 160]
return "🇬🇧 United Kingdom"
# US channels that were misclassified
if any(x in channel_lower for x in ["tv land", "tvland", "we tv", "wetv", "all weddings we tv", "cheaters", "cheers", "christmas 365"]):
return "🇺🇸 United States"
# Enhanced US network detection
us_networks = [
[cite_start]"cbs", "nbc", "abc", "fox", "cnn", "espn", "hbo", "showtime", "starz", "cinemax", [cite: 160, 161]
[cite_start]"mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel", [cite: 161]
[cite_start]"discovery", "history", "tlc", "hgtv", "food network", "travel channel", [cite: 161]
[cite_start]"lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network", [cite: 161]
[cite_start]"tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic" [cite: 161]
]
for network in us_networks:
[cite_start]if network in channel_lower and not any(x in all_text for x in ["canada", ".ca", "uk", ".uk"]): [cite: 161, 162]
return "🇺🇸 United States"
# UK channels (but not BBC America)
if "come dine with me" in channel_lower or "itv" in channel_lower:
return "🇬🇧 United Kingdom"
# Philippines news channels
if any(x in channel_lower for x in ["anc global", "anc ph"]):
return "🇵🇭 Philippines"
# Japan anime channels
[cite_start]if "animax" in channel_lower: [cite: 163]
[cite_start]return "🇯🇵 Japan" [cite: 163]
# STEP 4: Logo URL analysis
logo_patterns = {
"🇨🇦 Canada": ["/canada/", "/ca/", "canada.", "canadian"],
"🇺🇸 United States": ["/usa/", "/us/", "united-states", "american"],
"🇬🇧 United Kingdom": ["/uk/", "/united-kingdom/", "british", "england"],
"🇩🇪 Germany": ["/germany/", "/de/", "german", "deutschland"],
[cite_start]"🇫🇷 France": ["/france/", "/fr/", "french", "français"], [cite: 164]
[cite_start]"🇮🇹 Italy": ["/italy/", "/it/", "italian", "italiano"], [cite: 164]
[cite_start]"🇪🇸 Spain": ["/spain/", "/es/", "spanish", "español"], [cite: 164]
[cite_start]"🇳🇱 Netherlands": ["/netherlands/", "/nl/", "dutch", "nederland"], [cite: 164]
[cite_start]"🇦🇺 Australia": ["/australia/", "/au/", "australian", "aussie"], [cite: 164]
[cite_start]"🇯🇵 Japan": ["/japan/", "/jp/", "japanese", "日本"], [cite: 164]
[cite_start]"🇰🇷 South Korea": ["/korea/", "/kr/", "korean", "한국"], [cite: 164]
[cite_start]"🇮🇳 India": ["/india/", "/in/", "indian", "भारत"], [cite: 164, 165]
[cite_start]"🇧🇷 Brazil": ["/brazil/", "/br/", "brazilian", "brasil"], [cite: 165]
[cite_start]"🇲🇽 Mexico": ["/mexico/", "/mx/", "mexican", "méxico"], [cite: 165]
[cite_start]"🇦🇷 Argentina": ["/argentina/", "/ar/", "argentinian", "argentina"], [cite: 165]
[cite_start]"🇵🇭 Philippines": ["/philippines/", "/ph/", "filipino", "pilipinas"] [cite: 165]
}
for country, patterns in logo_patterns.items():
for pattern in patterns:
if pattern in logo_url.lower():
[cite_start]return country [cite: 166]
# STEP 5: Enhanced broadcaster patterns
broadcaster_patterns = {
"🇨🇦 Canada": [
"cbc", "tsn", "ctv", "global", "sportsnet", "citytv", "aptn", "teletoon", "ytv",
"discovery canada", "history canada", "slice", "w network", "oln", "hgtv canada",
[cite_start]"food network canada", "showcase", "crave", "super channel", "hollywood suite" [cite: 166, 167]
],
"🇺🇸 United States": [
"cbs", "nbc", "abc", "fox", "cnn", "espn", "amc", "mtv", "comedy central",
"discovery usa", "history usa", "tlc usa", "hgtv usa", "food network usa", "paramount",
"nickelodeon usa", "cartoon network usa", "disney usa", "lifetime", "e!", "bravo usa"
],
[cite_start]"🇬🇧 United Kingdom": [ [cite: 168]
[cite_start]"bbc", "itv", "channel 4", "channel 5", "sky", "dave", "really", "yesterday", [cite: 168]
[cite_start]"discovery uk", "history uk", "tlc uk", "living", "alibi", "gold", "drama" [cite: 168]
],
"🇩🇪 Germany": [
"ard", "zdf", "rtl", "pro7", "sat.1", "vox", "kabel eins", "super rtl", "rtl2",
[cite_start]"discovery germany", "history germany", "tlc germany", "dmax", "sixx", "tele 5" [cite: 169]
],
"🇫🇷 France": [
"tf1", "france 2", "france 3", "france 5", "m6", "canal+", "arte", "w9", "tmc",
"discovery france", "history france", "tlc france", "planete+", "ushuaia tv"
],
"🇮🇹 Italy": [
[cite_start]"rai", "canale 5", "italia 1", "rete 4", "la7", "tv8", "nove", "20 mediaset", [cite: 170]
[cite_start]"discovery italia", "history italia", "dmax italia", "real time", "giallo" [cite: 170]
],
"🇪🇸 Spain": [
"tve", "la 1", "la 2", "antena 3", "cuatro", "telecinco", "la sexta", "nova",
[cite_start]"discovery spain", "history spain", "dmax spain", "mega", "neox", "clan" [cite: 170, 171]
],
"🇳🇱 Netherlands": [
[cite_start]"npo", "rtl 4", "rtl 5", "rtl 7", "sbs6", "veronica", "net5", "rtl z", [cite: 171]
[cite_start]"discovery netherlands", "history netherlands", "tlc netherlands" [cite: 171]
],
"🇦🇺 Australia": [
"abc australia", "nine network", "seven network", "ten", "foxtel",
[cite_start]"discovery australia", "history australia", "lifestyle" [cite: 171, 172]
],
"🇯🇵 Japan": [
"nhk", "fuji tv", "tbs", "tv asahi", "tv tokyo", "nippon tv", "animax"
],
"🇰🇷 South Korea": [
"kbs", "mbc", "sbs", "jtbc", "tvn", "ocn"
],
"🇮🇳 India": [
[cite_start]"zee", "star plus", "colors", "sony tv", "& tv", "discovery india" [cite: 173]
],
"🇧🇷 Brazil": [
"globo", "sbt", "record", "band", "discovery brasil"
],
"🇲🇽 Mexico": [
"televisa", "tv azteca", "once tv", "discovery mexico"
],
[cite_start]"🇦🇷 Argentina": [ [cite: 174]
[cite_start]"telefe", "canal 13", "america tv", "discovery argentina" [cite: 174]
],
"🇵🇭 Philippines": [
"abs-cbn", "gma", "anc", "tv5", "pba rush"
]
}
for country, keywords in broadcaster_patterns.items():
for keyword in keywords:
[cite_start]if keyword in all_text: [cite: 175]
return country
return "Uncategorized"
def is_valid_country_group(group_name):
"""Check if group name is a valid country (not a streaming service)"""
valid_countries = [
"🇺🇸 United States", "🇨🇦 Canada", "🇬🇧 United Kingdom", "🇩🇪 Germany",
"🇫🇷 France", "🇮🇹 Italy", "🇪🇸 Spain", "🇳🇱 Netherlands", "🇧🇪 Belgium",
[cite_start]"🇦🇹 Austria", "🇨🇭 Switzerland", "🇸🇪 Sweden", "🇳🇴 Norway", "🇩🇰 Denmark", [cite: 176]
[cite_start]"🇫🇮 Finland", "🇵🇱 Poland", "🇨🇿 Czech Republic", "🇭🇺 Hungary", "🇵🇹 Portugal", [cite: 176]
[cite_start]"🇬🇷 Greece", "🇷🇴 Romania", "🇧🇬 Bulgaria", "🇭🇷 Croatia", "🇷🇸 Serbia", [cite: 176]
[cite_start]"🇦🇺 Australia", "🇯🇵 Japan", "🇰🇷 South Korea", "🇮🇳 India", "🇨🇳 China", [cite: 176]
[cite_start]"🇧🇷 Brazil", "🇲🇽 Mexico", "🇦🇷 Argentina", "🇨🇱 Chile", "🇨🇴 Colombia", [cite: 176]
[cite_start]"🇷🇺 Russia", "🇹🇷 Turkey", "🇸🇦 Saudi Arabia", "🇦🇪 UAE", "🇪🇬 Egypt", [cite: 176]
[cite_start]"🇿🇦 South Africa", "🇳🇬 Nigeria", "🇰🇪 Kenya", "🇮🇱 Israel", "🇹🇭 Thailand", [cite: 177]
[cite_start]"🇻🇳 Vietnam", "🇵🇭 Philippines", "🇮🇩 Indonesia", "🇲🇾 Malaysia", "🇸🇬 Singapore" [cite: 177]
]
return group_name in valid_countries
def clean_malformed_channel_name(raw_name):
"""Extract clean channel name from malformed EXTINF data."""
@ -23,7 +255,7 @@ def clean_malformed_channel_name(raw_name):
return "Unknown Channel"
# Handle completely malformed entries like:
# ".AB.ca",.AB.ca" tvg-logo="..." group-title="DaddyLive CA",CTV Canada [HD]"
# [cite_start]".AB.ca",.AB.ca" tvg-logo="..." group-title="DaddyLive CA",CTV Canada [HD]" [cite: 177, 178]
if raw_name.startswith('".') and 'tvg-logo=' in raw_name:
# Extract the actual channel name after the last comma
@ -34,19 +266,19 @@ def clean_malformed_channel_name(raw_name):
return clean_name
# If it contains EXTINF data, extract the name
if 'group-title=' in raw_name and ',' in raw_name:
extinf_match = re.search(r'group-title="[^"]*",(.+)$', raw_name)
[cite_start]if 'group-title=' in raw_name and ',' in raw_name: [cite: 179]
extinf_match = re.search(r'group-title="[^"]*",(.+)')
if extinf_match:
return extinf_match.group(1).strip().strip('"')
# If it has extra quotes and domains, clean them
if raw_name.startswith('.') and raw_name.count('"') > 2:
# [cite_start]If it has extra quotes and domains, clean them [cite: 199]
[cite_start]if raw_name.startswith('.') and raw_name.count('"') > 2: [cite: 199]
parts = raw_name.split(',')
for part in reversed(parts):
cleaned = part.strip().strip('"').strip()
if cleaned and not cleaned.startswith('.') and len(cleaned) > 2:
if not any(x in cleaned.lower() for x in ['http', 'tvg-', 'group-title', '.com', '.ca', '.us']):
return cleaned
[cite_start]return cleaned [cite: 200]
# Basic cleaning
cleaned = raw_name.strip().strip('"').strip()
@ -56,10 +288,9 @@ def clean_malformed_channel_name(raw_name):
cleaned = re.sub(r'^\.[\w.]+["\']*,?\s*', '', cleaned)
# Remove trailing EXTINF attributes
cleaned = re.sub(r'\s+tvg-.*$', '', cleaned)
cleaned = re.sub(r'\s+group-title.*$', '', cleaned)
cleaned = re.sub(r'\s+tvg-.*', '', cleaned)
return cleaned if cleaned and len(cleaned) > 1 else "Unknown Channel"
[cite_start]return cleaned if cleaned and len(cleaned) > 1 else "Unknown Channel" [cite: 233]
def extract_epg_from_malformed(raw_name):
@ -74,284 +305,12 @@ def extract_epg_from_malformed(raw_name):
# Look for .us domains
domain_match = re.search(r'\.([A-Z]{2})\.us', raw_name)
if domain_match:
state = domain_match.group(1)
return f"generic.{state}.us"
[cite_start]state = domain_match.group(1) [cite: 234]
[cite_start]return f"generic.{state}.us" [cite: 234]
return ""
def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", stream_url=""):
"""
Enhanced country detection using 3-point analysis
Priority: EPG ID > Logo URL > Channel Name > Stream URL
"""
# Combine all text for analysis
all_text = f"{channel_name.lower()} {epg_id.lower()} {logo_url.lower()} {stream_url.lower()}"
# STEP 1: Check for streaming services first (these go to Uncategorized)
streaming_services = [
"plex", "pluto", "tubi", "samsung", "xumo", "stirr", "crackle", "imdb tv",
"daddylive", "drew247", "aixmedia", "moveonjoy", "drewlive24", "udptv",
"a1xs.vip", "zekonew", "forcedtoplay", "cdn1host", "tvpass.org",
"jmp2.uk/plu-", "provider-static.plex.tv", "images.pluto.tv"
]
for service in streaming_services:
if service in all_text:
return "Uncategorized"
# STEP 2: EPG ID detection (most reliable) - Enhanced
epg_patterns = {
".ca": "🇨🇦 Canada",
".us": "🇺🇸 United States",
".uk": "🇬🇧 United Kingdom",
".ph": "🇵🇭 Philippines",
".au": "🇦🇺 Australia",
".jp": "🇯🇵 Japan",
".my": "🇲🇾 Malaysia",
".de": "🇩🇪 Germany",
".fr": "🇫🇷 France",
".es": "🇪🇸 Spain",
".it": "🇮🇹 Italy",
".br": "🇧🇷 Brazil",
".nl": "🇳🇱 Netherlands"
}
for domain, country in epg_patterns.items():
if domain in epg_id.lower():
return country
# Enhanced Canadian EPG detection
canadian_epg_patterns = [
"cbc.", "ctv.", "global.", "tsn.", "sportsnet.", "citytv.", "aptn.",
".ab.ca", ".bc.ca", ".mb.ca", ".nb.ca", ".nl.ca", ".ns.ca", ".nt.ca",
".nu.ca", ".on.ca", ".pe.ca", ".qc.ca", ".sk.ca", ".yt.ca",
"cfcn", "cky", "ctfo", "cjoh", "ckws"
]
for pattern in canadian_epg_patterns:
if pattern in epg_id.lower() or pattern in all_text:
return "🇨🇦 Canada"
# STEP 3: Enhanced specific channel fixes
channel_lower = channel_name.lower()
# Enhanced Canadian channels detection
canadian_indicators = [
# TSN variations
"tsn 1", "tsn 2", "tsn 3", "tsn 4", "tsn 5", "tsn1", "tsn2", "tsn3", "tsn4", "tsn5",
# CBC variations
"cbc news", "cbc toronto", "cbc calgary", "cbc vancouver", "cbc winnipeg", "cbc montreal",
# CTV variations
"ctv calgary", "ctv vancouver", "ctv toronto", "ctv winnipeg", "ctv ottawa", "ctv montreal",
"ctv atlantic", "ctv edmonton", "ctv saskatoon", "ctv regina", "ctv kitchener",
# Regional station calls
"cfcn", "cky", "ctfo", "cjoh", "ckws", "cfrn", "cfqc", "ckck", "chch",
# Other Canadian broadcasters
"sportsnet", "global tv", "citytv", "aptn", "omni", "tvo", "télé-québec"
]
for indicator in canadian_indicators:
if indicator in channel_lower:
return "🇨🇦 Canada"
# Enhanced BBC handling (distinguish US vs UK)
if "bbc" in channel_lower:
# BBC America is US
if "bbc america" in channel_lower:
return "🇺🇸 United States"
# Most other BBC channels are UK
elif any(x in channel_lower for x in ["bbc one", "bbc two", "bbc three", "bbc four",
"bbc news", "bbc iplayer", "bbc scotland", "bbc wales",
"bbc comedy", "bbc drama", "bbc earth"]):
# Check if it's specifically UK version
if not any(x in all_text for x in ["america", ".us", "usa"]):
return "🇬🇧 United Kingdom"
# US channels that were misclassified
if any(x in channel_lower for x in ["tv land", "tvland", "we tv", "wetv", "all weddings we tv", "cheaters", "cheers", "christmas 365"]):
return "🇺🇸 United States"
# Enhanced US network detection
us_networks = [
"cbs", "nbc", "abc", "fox", "cnn", "espn", "hbo", "showtime", "starz", "cinemax",
"mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel",
"discovery", "history", "tlc", "hgtv", "food network", "travel channel",
"lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network",
"tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic"
]
for network in us_networks:
if network in channel_lower and not any(x in all_text for x in ["canada", ".ca", "uk", ".uk"]):
return "🇺🇸 United States"
# UK channels (but not BBC America)
if "come dine with me" in channel_lower or "itv" in channel_lower:
return "🇬🇧 United Kingdom"
# Philippines news channels
if any(x in channel_lower for x in ["anc global", "anc ph"]):
return "🇵🇭 Philippines"
# Japan anime channels
if "animax" in channel_lower:
return "🇯🇵 Japan"
# STEP 4: Logo URL analysis
logo_patterns = {
"🇨🇦 Canada": ["/canada/", "/ca/", "canada.", "canadian"],
"🇺🇸 United States": ["/usa/", "/us/", "united-states", "american"],
"🇬🇧 United Kingdom": ["/uk/", "/united-kingdom/", "british", "england"],
"🇩🇪 Germany": ["/germany/", "/de/", "german", "deutschland"],
"🇫🇷 France": ["/france/", "/fr/", "french", "français"],
"🇮🇹 Italy": ["/italy/", "/it/", "italian", "italiano"],
"🇪🇸 Spain": ["/spain/", "/es/", "spanish", "español"],
"🇳🇱 Netherlands": ["/netherlands/", "/nl/", "dutch", "nederland"],
"🇦🇺 Australia": ["/australia/", "/au/", "australian", "aussie"],
"🇯🇵 Japan": ["/japan/", "/jp/", "japanese", "日本"],
"🇰🇷 South Korea": ["/korea/", "/kr/", "korean", "한국"],
"🇮🇳 India": ["/india/", "/in/", "indian", "भारत"],
"🇧🇷 Brazil": ["/brazil/", "/br/", "brazilian", "brasil"],
"🇲🇽 Mexico": ["/mexico/", "/mx/", "mexican", "méxico"],
"🇦🇷 Argentina": ["/argentina/", "/ar/", "argentinian", "argentina"],
"🇵🇭 Philippines": ["/philippines/", "/ph/", "filipino", "pilipinas"]
}
for country, patterns in logo_patterns.items():
for pattern in patterns:
if pattern in logo_url.lower():
return country
# STEP 5: Enhanced broadcaster patterns with more comprehensive coverage
broadcaster_patterns = {
"🇨🇦 Canada": [
# Major networks
"cbc", "ctv", "global", "citytv", "aptn", "omni", "tvo",
# Sports
"tsn", "sportsnet", "rds", "rds info",
# Specialty
"teletoon", "ytv", "treehouse", "family channel", "oln", "cottage life",
"discovery canada", "history canada", "slice", "w network", "hgtv canada",
"food network canada", "showcase", "crave", "super channel", "hollywood suite",
# French Canadian
"ici", "télé-québec", "tva", "noovo", "canal d", "canal vie",
# Regional identifiers
"calgary", "vancouver", "toronto", "winnipeg", "montreal", "ottawa", "halifax",
"edmonton", "saskatoon", "regina", "victoria", "quebec city"
],
"🇺🇸 United States": [
# Major networks
"cbs", "nbc", "abc", "fox", "cw", "pbs", "ion", "mynetworktv",
# News
"cnn", "fox news", "msnbc", "cnbc", "bloomberg", "newsmax", "oann",
# Sports
"espn", "fox sports", "nfl network", "mlb network", "nba tv", "nhl network",
# Premium
"hbo", "showtime", "starz", "cinemax", "epix",
# Cable networks
"mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel",
"discovery", "history", "tlc", "hgtv", "food network", "travel channel",
"lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network",
"tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic",
# But exclude specifically Canadian versions
"usa", "america", "american", "united states"
],
"🇬🇧 United Kingdom": [
# BBC (but not BBC America)
"bbc one", "bbc two", "bbc three", "bbc four", "bbc news", "bbc iplayer",
"bbc scotland", "bbc wales", "bbc northern ireland", "bbc parliament",
"bbc comedy", "bbc drama", "bbc earth", "bbc world news",
# ITV
"itv", "itv2", "itv3", "itv4", "itv be", "itvx",
# Channel 4
"channel 4", "channel 5", "e4", "more4", "film4",
# Sky
"sky", "sky news", "sky sports", "sky one", "sky two", "sky atlantic",
# Other UK
"dave", "really", "yesterday", "drama", "alibi", "gold", "living",
"discovery uk", "history uk", "tlc uk", "quest", "dmax uk",
# UK specific terms
"british", "england", "scotland", "wales", "northern ireland", "uk"
],
"🇩🇪 Germany": [
"ard", "zdf", "rtl", "pro7", "sat.1", "vox", "kabel eins", "super rtl", "rtl2",
"discovery germany", "history germany", "tlc germany", "dmax", "sixx", "tele 5"
],
"🇫🇷 France": [
"tf1", "france 2", "france 3", "france 5", "m6", "canal+", "arte", "w9", "tmc",
"discovery france", "history france", "tlc france", "planete+", "ushuaia tv"
],
"🇮🇹 Italy": [
"rai", "canale 5", "italia 1", "rete 4", "la7", "tv8", "nove", "20 mediaset",
"discovery italia", "history italia", "dmax italia", "real time", "giallo"
],
"🇪🇸 Spain": [
"tve", "la 1", "la 2", "antena 3", "cuatro", "telecinco", "la sexta", "nova",
"discovery spain", "history spain", "dmax spain", "mega", "neox", "clan"
],
"🇳🇱 Netherlands": [
"npo", "rtl 4", "rtl 5", "rtl 7", "sbs6", "veronica", "net5", "rtl z",
"discovery netherlands", "history netherlands", "tlc netherlands"
],
"🇦🇺 Australia": [
"abc australia", "nine network", "seven network", "ten", "foxtel",
"discovery australia", "history australia", "lifestyle"
],
"🇯🇵 Japan": [
"nhk", "fuji tv", "tbs", "tv asahi", "tv tokyo", "nippon tv", "animax"
],
"🇰🇷 South Korea": [
"kbs", "mbc", "sbs", "jtbc", "tvn", "ocn"
],
"🇮🇳 India": [
"zee", "star plus", "colors", "sony tv", "& tv", "discovery india"
],
"🇧🇷 Brazil": [
"globo", "sbt", "record", "band", "discovery brasil"
],
"🇲🇽 Mexico": [
"televisa", "tv azteca", "once tv", "discovery mexico"
],
"🇦🇷 Argentina": [
"telefe", "canal 13", "america tv", "discovery argentina"
],
"🇵🇭 Philippines": [
"abs-cbn", "gma", "anc", "tv5", "pba rush"
]
}
# Enhanced pattern matching with conflict resolution
for country, keywords in broadcaster_patterns.items():
for keyword in keywords:
if keyword in all_text:
# Special handling for conflicting patterns
if country == "🇺🇸 United States" and any(ca_term in all_text for ca_term in [".ca", "canada", "canadian"]):
continue # Skip US assignment if Canadian indicators present
if country == "🇬🇧 United Kingdom" and "america" in all_text:
continue # Skip UK assignment if "america" is present
return country
return "Uncategorized"
def is_valid_country_group(group_name):
"""Check if group name is a valid country (not a streaming service)"""
valid_countries = [
"🇺🇸 United States", "🇨🇦 Canada", "🇬🇧 United Kingdom", "🇩🇪 Germany",
"🇫🇷 France", "🇮🇹 Italy", "🇪🇸 Spain", "🇳🇱 Netherlands", "🇧🇪 Belgium",
"🇦🇹 Austria", "🇨🇭 Switzerland", "🇸🇪 Sweden", "🇳🇴 Norway", "🇩🇰 Denmark",
"🇫🇮 Finland", "🇵🇱 Poland", "🇨🇿 Czech Republic", "🇭🇺 Hungary", "🇵🇹 Portugal",
"🇬🇷 Greece", "🇷🇴 Romania", "🇧🇬 Bulgaria", "🇭🇷 Croatia", "🇷🇸 Serbia",
"🇦🇺 Australia", "🇯🇵 Japan", "🇰🇷 South Korea", "🇮🇳 India", "🇨🇳 China",
"🇧🇷 Brazil", "🇲🇽 Mexico", "🇦🇷 Argentina", "🇨🇱 Chile", "🇨🇴 Colombia",
"🇷🇺 Russia", "🇹🇷 Turkey", "🇸🇦 Saudi Arabia", "🇦🇪 UAE", "🇪🇬 Egypt",
"🇿🇦 South Africa", "🇳🇬 Nigeria", "🇰🇪 Kenya", "🇮🇱 Israel", "🇹🇭 Thailand",
"🇻🇳 Vietnam", "🇵🇭 Philippines", "🇮🇩 Indonesia", "🇲🇾 Malaysia", "🇸🇬 Singapore"
]
return group_name in valid_countries
def load_channels():
"""Load channels from channels.txt with integrated data cleanup."""
if not os.path.exists('channels.txt'):
@ -362,51 +321,51 @@ def load_channels():
with open('channels.txt', 'r', encoding='utf-8') as f:
content = f.read()
channels = []
cleaned_count = 0
[cite_start]channels = [] [cite: 235]
[cite_start]cleaned_count = 0 [cite: 235]
print("🧹 Step 1: Data Cleanup (fixing malformed entries)")
print("-" * 50)
[cite_start]print("🧹 Step 1: Data Cleanup (fixing malformed entries)") [cite: 235]
[cite_start]print("-" * 50) [cite: 235]
for block in content.split('\n\n'):
if not block.strip():
continue
[cite_start]continue [cite: 236]
channel_data = {}
for line in block.strip().split('\n'):
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
[cite_start]key = key.strip() [cite: 237]
[cite_start]value = value.strip() [cite: 237]
if key == "Stream name":
# Check if this is malformed
if (value.startswith('".') or 'tvg-logo=' in value or
'group-title=' in value or value.count('"') > 2):
[cite_start]if (value.startswith('".') or 'tvg-logo=' in value or [cite: 238]
[cite_start]'group-title=' in value or value.count('"') > 2): [cite: 238]
# Clean the malformed name
clean_name = clean_malformed_channel_name(value)
channel_data["Stream name"] = clean_name
[cite_start]clean_name = clean_malformed_channel_name(value) [cite: 239]
[cite_start]channel_data["Stream name"] = clean_name [cite: 239, 240]
# Extract EPG ID if missing
if not channel_data.get("EPG id"):
extracted_epg = extract_epg_from_malformed(value)
[cite_start]if not channel_data.get("EPG id"): [cite: 240]
[cite_start]extracted_epg = extract_epg_from_malformed(value) [cite: 241]
if extracted_epg:
channel_data["EPG id"] = extracted_epg
cleaned_count += 1
[cite_start]cleaned_count += 1 [cite: 242]
if cleaned_count <= 10: # Show first 10 examples
print(f"🔧 Fixed: '{value[:40]}...''{clean_name}'")
[cite_start]print(f"🔧 Fixed: '{value[:40]}...''{clean_name}'") [cite: 243]
else:
channel_data[key] = value
else:
[cite_start]else: [cite: 244]
channel_data[key] = value
# Only add channels with valid names
if (channel_data.get('Stream name') and
len(channel_data.get('Stream name', '')) > 1 and
channel_data.get('Stream name') != "Unknown Channel"):
[cite_start]channel_data.get('Stream name') != "Unknown Channel"): [cite: 245]
channels.append(channel_data)
print(f"✅ Data cleanup complete: {cleaned_count} entries fixed")
@ -414,22 +373,22 @@ def load_channels():
return channels
except Exception as e:
print(f"❌ Error loading channels: {e}")
[cite_start]print(f"❌ Error loading channels: {e}") [cite: 246]
return []
def reorganize_channels(channels):
"""Enhanced reorganization with integrated cleanup + 3-point analysis."""
print("\n🔍 Step 2: Enhanced Country Detection with 3-Point Analysis")
print("📊 Analyzing: Channel Name + EPG ID + Logo URL")
print("-" * 60)
"""Enhanced reorganization with 3-point analysis."""
[cite_start]print("\n🔍 Step 2: Enhanced Country Detection with 3-Point Analysis") [cite: 179]
[cite_start]print("📊 Analyzing: Channel Name + EPG ID + Logo URL") [cite: 179]
[cite_start]print("-" * 60) [cite: 179]
changes = 0
stats = {
'country_detected': 0,
'sent_to_uncategorized': 0,
'kept_existing_country': 0,
'streaming_filtered': 0
[cite_start]'country_detected': 0, [cite: 180]
[cite_start]'sent_to_uncategorized': 0, [cite: 180]
[cite_start]'kept_existing_country': 0, [cite: 180]
[cite_start]'streaming_filtered': 0 [cite: 180]
}
country_counts = {}
@ -438,14 +397,14 @@ def reorganize_channels(channels):
stream_name = channel.get('Stream name', '')
epg_id = channel.get('EPG id', '')
logo = channel.get('Logo', '')
stream_url = channel.get('Stream URL', '')
[cite_start]stream_url = channel.get('Stream URL', '') [cite: 181]
# Detect country using enhanced 3-point analysis
detected_country = detect_country_from_channel_content(stream_name, epg_id, logo, stream_url)
# Debug output for first few channels to see what's happening
if changes < 5:
print(f"🔍 Debug: '{stream_name}' | EPG: '{epg_id}' | Detected: {detected_country}")
[cite_start]print(f"🔍 Debug: '{stream_name}' | EPG: '{epg_id}' | Detected: {detected_country}") [cite: 181, 182]
# Decide final group
if is_valid_country_group(old_group) and detected_country != "Uncategorized":
@ -454,23 +413,23 @@ def reorganize_channels(channels):
stats['kept_existing_country'] += 1
elif detected_country != "Uncategorized":
# Use detected country
final_group = detected_country
stats['country_detected'] += 1
[cite_start]final_group = detected_country [cite: 183]
[cite_start]stats['country_detected'] += 1 [cite: 183]
if old_group != detected_country:
print(f"🔍 Fixed: '{stream_name}' {old_group}{detected_country}")
changes += 1
else:
# Send to Uncategorized
final_group = "Uncategorized"
stats['sent_to_uncategorized'] += 1
[cite_start]final_group = "Uncategorized" [cite: 184]
[cite_start]stats['sent_to_uncategorized'] += 1 [cite: 184]
if old_group != "Uncategorized":
# Check if it's a streaming service
if any(service in stream_name.lower() for service in ['samsung', 'pluto', 'plex', 'tubi']):
stats['streaming_filtered'] += 1
print(f"📱 Platform: '{stream_name}' → Uncategorized")
[cite_start]if any(service in stream_name.lower() for service in ['samsung', 'pluto', 'plex', 'tubi']): [cite: 184, 185]
[cite_start]stats['streaming_filtered'] += 1 [cite: 185]
[cite_start]print(f"📱 Platform: '{stream_name}' → Uncategorized") [cite: 185]
else:
print(f"❓ Undetected: '{stream_name}' → Uncategorized")
changes += 1
[cite_start]changes += 1 [cite: 186]
channel['Group'] = final_group
country_counts[final_group] = country_counts.get(final_group, 0) + 1
@ -483,7 +442,8 @@ def reorganize_channels(channels):
print(f"❓ Sent to Uncategorized: {stats['sent_to_uncategorized']}")
print(f"\n🌍 FINAL GROUP DISTRIBUTION:")
sorted_countries = sorted(country_counts.items(), key=lambda x: (x[0] == "Uncategorized", -x[1]))
[cite_start]sorted_countries = sorted(country_counts.items(), key=lambda x: (x[0] == "Uncategorized", -x[1])) [cite: 187]
for country, count in sorted_countries:
percentage = (count / len(channels) * 100) if len(channels) > 0 else 0
print(f" {country}: {count} channels ({percentage:.1f}%)")
@ -497,23 +457,23 @@ def save_channels(channels):
if os.path.exists('channels.txt'):
backup = f"channels_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
shutil.copy2('channels.txt', backup)
print(f"📋 Backup: {backup}")
[cite_start]print(f"📋 Backup: {backup}") [cite: 188]
try:
with open('channels.txt', 'w', encoding='utf-8') as f:
for i, channel in enumerate(channels):
if i > 0:
f.write("\n\n")
f.write(f"Group = {channel.get('Group', 'Uncategorized')}\n")
f.write(f"Stream name = {channel.get('Stream name', 'Unknown')}\n")
f.write(f"Logo = {channel.get('Logo', '')}\n")
f.write(f"EPG id = {channel.get('EPG id', '')}\n")
f.write(f"Stream URL = {channel.get('Stream URL', '')}\n")
[cite_start]f.write(f"Group = {channel.get('Group', 'Uncategorized')}\n") [cite: 188, 189]
[cite_start]f.write(f"Stream name = {channel.get('Stream name', 'Unknown')}\n") [cite: 189]
[cite_start]f.write(f"Logo = {channel.get('Logo', '')}\n") [cite: 189]
[cite_start]f.write(f"EPG id = {channel.get('EPG id', '')}\n") [cite: 189]
[cite_start]f.write(f"Stream URL = {channel.get('Stream URL', '')}\n") [cite: 189]
print(f"✅ Saved {len(channels)} channels")
[cite_start]print(f"✅ Saved {len(channels)} channels") [cite: 190]
return True
except Exception as e:
print(f"❌ Save error: {e}")
[cite_start]print(f"❌ Save error: {e}") [cite: 190]
return False
@ -523,25 +483,25 @@ def generate_m3u(channels):
with open('playlist.m3u', 'w', encoding='utf-8') as f:
f.write('#EXTM3U\n')
for channel in channels:
name = channel.get('Stream name', '')
group = channel.get('Group', 'Uncategorized')
logo = channel.get('Logo', '')
epg_id = channel.get('EPG id', '')
url = channel.get('Stream URL', '')
[cite_start]for channel in channels: [cite: 191]
[cite_start]name = channel.get('Stream name', '') [cite: 191]
[cite_start]group = channel.get('Group', 'Uncategorized') [cite: 191]
[cite_start]logo = channel.get('Logo', '') [cite: 191]
[cite_start]epg_id = channel.get('EPG id', '') [cite: 191]
[cite_start]url = channel.get('Stream URL', '') [cite: 191]
if name and url:
f.write(f'#EXTINF:-1 group-title="{group}"')
[cite_start]if name and url: [cite: 192]
[cite_start]f.write(f'#EXTINF:-1 group-title="{group}"') [cite: 192]
if logo:
f.write(f' tvg-logo="{logo}"')
[cite_start]f.write(f' tvg-logo="{logo}"') [cite: 193]
if epg_id:
f.write(f' tvg-id="{epg_id}"')
f.write(f',{name}\n{url}\n')
print("✅ Generated playlist.m3u")
return True
except Exception as e:
print(f"❌ M3U error: {e}")
[cite_start]except Exception as e: [cite: 194]
[cite_start]print(f"❌ M3U error: {e}") [cite: 194]
return False
@ -554,7 +514,7 @@ def main():
print("🎯 Step 3: Filter streaming services to Uncategorized")
print("=" * 80)
channels = load_channels()
[cite_start]channels = load_channels() [cite: 195]
if not channels:
return False
@ -568,7 +528,7 @@ def main():
))
# Save and generate
if not save_channels(channels):
[cite_start]if not save_channels(channels): [cite: 196]
return False
if not generate_m3u(channels):
@ -582,20 +542,20 @@ def main():
except:
pass
print("\n🎉 ENHANCED PROCESSING COMPLETE!")
print("✅ Malformed data cleaned and fixed")
print("✅ 3-point analysis applied to all channels")
print("✅ Countries detected from EPG ID, Logo URL, and Channel Names")
print("✅ Streaming services filtered to Uncategorized")
print("✅ Clean country-organized playlist generated")
[cite_start]print("\n🎉 ENHANCED PROCESSING COMPLETE!") [cite: 197]
[cite_start]print("✅ Malformed data cleaned and fixed") [cite: 197]
[cite_start]print("✅ 3-point analysis applied to all channels") [cite: 197]
[cite_start]print("✅ Countries detected from EPG ID, Logo URL, and Channel Names") [cite: 197]
[cite_start]print("✅ Streaming services filtered to Uncategorized") [cite: 197]
[cite_start]print("✅ Clean country-organized playlist generated") [cite: 197]
# Final statistics
uncategorized_count = sum(1 for ch in channels if ch.get('Group') == 'Uncategorized')
success_rate = ((len(channels) - uncategorized_count) / len(channels) * 100) if len(channels) > 0 else 0
print(f"\n📊 FINAL STATISTICS:")
print(f" Total channels: {len(channels)}")
print(f" Properly categorized: {len(channels) - uncategorized_count} ({success_rate:.1f}%)")
print(f" In Uncategorized: {uncategorized_count} ({100 - success_rate:.1f}%)")
[cite_start]success_rate = ((len(channels) - uncategorized_count) / len(channels) * 100) if len(channels) > 0 else 0 [cite: 198]
[cite_start]print(f"\n📊 FINAL STATISTICS:") [cite: 198]
[cite_start]print(f" Total channels: {len(channels)}") [cite: 198]
[cite_start]print(f" Properly categorized: {len(channels) - uncategorized_count} ({success_rate:.1f}%)") [cite: 198]
[cite_start]print(f" In Uncategorized: {uncategorized_count} ({100 - success_rate:.1f}%)") [cite: 198]
return True