my-private-iptv-m3u/scripts/generate_playlist.py

455 lines
18 KiB
Python
Raw Normal View History

2025-06-27 23:26:06 +02:00
#!/usr/bin/env python3
"""
2025-06-29 03:51:38 +02:00
IPTV Enhanced Country Detection - Updated Version
Uses 3-point analysis: Channel Name + EPG ID + Logo URL
Then filters to keep only legitimate countries
2025-06-27 23:26:06 +02:00
"""
2025-06-27 16:34:52 +02:00
import os
2025-06-29 02:02:34 +02:00
import shutil
2025-06-27 17:36:03 +02:00
from datetime import datetime
2025-06-29 02:06:07 +02:00
from pathlib import Path
2025-06-29 02:47:11 +02:00
# Ensure correct directory
2025-06-29 02:06:07 +02:00
script_dir = Path(__file__).parent
root_dir = script_dir.parent
os.chdir(root_dir)
2025-06-27 16:34:52 +02:00
2025-06-29 03:51:38 +02:00
def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", stream_url=""):
"""
Enhanced country detection using 3-point analysis
Priority: EPG ID > Logo URL > Channel Name > Stream URL
"""
# Combine all text for analysis
2025-06-29 03:33:41 +02:00
all_text = f"{channel_name.lower()} {epg_id.lower()} {logo_url.lower()} {stream_url.lower()}"
2025-06-29 03:22:13 +02:00
2025-06-29 03:51:38 +02:00
# STEP 1: Check for streaming services first (these go to Uncategorized)
streaming_services = [
"plex", "pluto", "tubi", "samsung", "xumo", "stirr", "crackle", "imdb tv",
"daddylive", "drew247", "aixmedia", "moveonjoy", "drewlive24", "udptv",
"a1xs.vip", "zekonew", "forcedtoplay", "cdn1host", "tvpass.org",
"jmp2.uk/plu-", "provider-static.plex.tv", "images.pluto.tv"
]
for service in streaming_services:
if service in all_text:
return "Uncategorized"
# STEP 2: EPG ID detection (most reliable) - Enhanced
epg_patterns = {
".ca": "🇨🇦 Canada",
".us": "🇺🇸 United States",
".uk": "🇬🇧 United Kingdom",
".ph": "🇵🇭 Philippines",
".au": "🇦🇺 Australia",
".jp": "🇯🇵 Japan",
".my": "🇲🇾 Malaysia",
".de": "🇩🇪 Germany",
".fr": "🇫🇷 France",
".es": "🇪🇸 Spain",
".it": "🇮🇹 Italy",
".br": "🇧🇷 Brazil",
".nl": "🇳🇱 Netherlands"
2025-06-29 02:47:11 +02:00
}
2025-06-29 02:35:11 +02:00
2025-06-29 03:51:38 +02:00
for domain, country in epg_patterns.items():
2025-06-29 02:47:11 +02:00
if domain in epg_id.lower():
return country
2025-06-29 02:02:34 +02:00
2025-06-29 03:51:38 +02:00
# Enhanced Canadian EPG detection
canadian_epg_patterns = [
"cbc.", "ctv.", "global.", "tsn.", "sportsnet.", "citytv.", "aptn.",
".ab.ca", ".bc.ca", ".mb.ca", ".nb.ca", ".nl.ca", ".ns.ca", ".nt.ca",
".nu.ca", ".on.ca", ".pe.ca", ".qc.ca", ".sk.ca", ".yt.ca",
"cfcn", "cky", "ctfo", "cjoh", "ckws"
]
for pattern in canadian_epg_patterns:
if pattern in epg_id.lower() or pattern in all_text:
return "🇨🇦 Canada"
2025-06-29 03:22:13 +02:00
2025-06-29 03:51:38 +02:00
# STEP 3: Enhanced specific channel fixes
channel_lower = channel_name.lower()
# Enhanced Canadian channels detection
canadian_indicators = [
# TSN variations
"tsn 1", "tsn 2", "tsn 3", "tsn 4", "tsn 5", "tsn1", "tsn2", "tsn3", "tsn4", "tsn5",
# CBC variations
"cbc news", "cbc toronto", "cbc calgary", "cbc vancouver", "cbc winnipeg", "cbc montreal",
# CTV variations
"ctv calgary", "ctv vancouver", "ctv toronto", "ctv winnipeg", "ctv ottawa", "ctv montreal",
"ctv atlantic", "ctv edmonton", "ctv saskatoon", "ctv regina", "ctv kitchener",
# Regional station calls
"cfcn", "cky", "ctfo", "cjoh", "ckws", "cfrn", "cfqc", "ckck", "chch",
# Other Canadian broadcasters
"sportsnet", "global tv", "citytv", "aptn", "omni", "tvo", "télé-québec"
]
2025-06-29 03:22:13 +02:00
2025-06-29 03:51:38 +02:00
for indicator in canadian_indicators:
if indicator in channel_lower:
return "🇨🇦 Canada"
# Enhanced BBC handling (distinguish US vs UK)
if "bbc" in channel_lower:
# BBC America is US
if "bbc america" in channel_lower:
return "🇺🇸 United States"
# Most other BBC channels are UK
elif any(x in channel_lower for x in ["bbc one", "bbc two", "bbc three", "bbc four",
"bbc news", "bbc iplayer", "bbc scotland", "bbc wales",
"bbc comedy", "bbc drama", "bbc earth"]):
# Check if it's specifically UK version
if not any(x in all_text for x in ["america", ".us", "usa"]):
return "🇬🇧 United Kingdom"
2025-06-29 03:22:13 +02:00
# US channels that were misclassified
if any(x in channel_lower for x in ["tv land", "tvland", "we tv", "wetv", "all weddings we tv", "cheaters", "cheers", "christmas 365"]):
return "🇺🇸 United States"
2025-06-29 03:51:38 +02:00
# Enhanced US network detection
us_networks = [
"cbs", "nbc", "abc", "fox", "cnn", "espn", "hbo", "showtime", "starz", "cinemax",
"mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel",
"discovery", "history", "tlc", "hgtv", "food network", "travel channel",
"lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network",
"tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic"
]
for network in us_networks:
if network in channel_lower and not any(x in all_text for x in ["canada", ".ca", "uk", ".uk"]):
return "🇺🇸 United States"
# UK channels (but not BBC America)
if "come dine with me" in channel_lower or "itv" in channel_lower:
2025-06-29 03:22:13 +02:00
return "🇬🇧 United Kingdom"
# Philippines news channels
if any(x in channel_lower for x in ["anc global", "anc ph"]):
return "🇵🇭 Philippines"
# Japan anime channels
if "animax" in channel_lower:
return "🇯🇵 Japan"
2025-06-29 03:51:38 +02:00
# STEP 4: Logo URL analysis
2025-06-29 03:22:13 +02:00
logo_patterns = {
2025-06-29 03:51:38 +02:00
"🇨🇦 Canada": ["/canada/", "/ca/", "canada.", "canadian"],
"🇺🇸 United States": ["/usa/", "/us/", "united-states", "american"],
"🇬🇧 United Kingdom": ["/uk/", "/united-kingdom/", "british", "england"],
"🇩🇪 Germany": ["/germany/", "/de/", "german", "deutschland"],
"🇫🇷 France": ["/france/", "/fr/", "french", "français"],
"🇮🇹 Italy": ["/italy/", "/it/", "italian", "italiano"],
"🇪🇸 Spain": ["/spain/", "/es/", "spanish", "español"],
"🇳🇱 Netherlands": ["/netherlands/", "/nl/", "dutch", "nederland"],
"🇦🇺 Australia": ["/australia/", "/au/", "australian", "aussie"],
"🇯🇵 Japan": ["/japan/", "/jp/", "japanese", "日本"],
"🇰🇷 South Korea": ["/korea/", "/kr/", "korean", "한국"],
"🇮🇳 India": ["/india/", "/in/", "indian", "भारत"],
"🇧🇷 Brazil": ["/brazil/", "/br/", "brazilian", "brasil"],
"🇲🇽 Mexico": ["/mexico/", "/mx/", "mexican", "méxico"],
"🇦🇷 Argentina": ["/argentina/", "/ar/", "argentinian", "argentina"],
"🇵🇭 Philippines": ["/philippines/", "/ph/", "filipino", "pilipinas"]
2025-06-29 02:02:34 +02:00
}
2025-06-29 03:51:38 +02:00
for country, patterns in logo_patterns.items():
for pattern in patterns:
if pattern in logo_url.lower():
return country
2025-06-29 03:22:13 +02:00
2025-06-29 03:51:38 +02:00
# STEP 5: Enhanced broadcaster patterns
broadcaster_patterns = {
"🇨🇦 Canada": [
"cbc", "tsn", "ctv", "global", "sportsnet", "citytv", "aptn", "teletoon", "ytv",
"discovery canada", "history canada", "slice", "w network", "oln", "hgtv canada",
"food network canada", "showcase", "crave", "super channel", "hollywood suite"
],
"🇺🇸 United States": [
"cbs", "nbc", "abc", "fox", "cnn", "espn", "amc", "mtv", "comedy central",
"discovery usa", "history usa", "tlc usa", "hgtv usa", "food network usa", "paramount",
"nickelodeon usa", "cartoon network usa", "disney usa", "lifetime", "e!", "bravo usa"
],
"🇬🇧 United Kingdom": [
"bbc", "itv", "channel 4", "channel 5", "sky", "dave", "really", "yesterday",
"discovery uk", "history uk", "tlc uk", "living", "alibi", "gold", "drama"
],
"🇩🇪 Germany": [
"ard", "zdf", "rtl", "pro7", "sat.1", "vox", "kabel eins", "super rtl", "rtl2",
"discovery germany", "history germany", "tlc germany", "dmax", "sixx", "tele 5"
],
"🇫🇷 France": [
"tf1", "france 2", "france 3", "france 5", "m6", "canal+", "arte", "w9", "tmc",
"discovery france", "history france", "tlc france", "planete+", "ushuaia tv"
],
"🇮🇹 Italy": [
"rai", "canale 5", "italia 1", "rete 4", "la7", "tv8", "nove", "20 mediaset",
"discovery italia", "history italia", "dmax italia", "real time", "giallo"
],
"🇪🇸 Spain": [
"tve", "la 1", "la 2", "antena 3", "cuatro", "telecinco", "la sexta", "nova",
"discovery spain", "history spain", "dmax spain", "mega", "neox", "clan"
],
"🇳🇱 Netherlands": [
"npo", "rtl 4", "rtl 5", "rtl 7", "sbs6", "veronica", "net5", "rtl z",
"discovery netherlands", "history netherlands", "tlc netherlands"
],
"🇦🇺 Australia": [
"abc australia", "nine network", "seven network", "ten", "foxtel",
"discovery australia", "history australia", "lifestyle"
],
"🇯🇵 Japan": [
"nhk", "fuji tv", "tbs", "tv asahi", "tv tokyo", "nippon tv", "animax"
],
"🇰🇷 South Korea": [
"kbs", "mbc", "sbs", "jtbc", "tvn", "ocn"
],
"🇮🇳 India": [
"zee", "star plus", "colors", "sony tv", "& tv", "discovery india"
],
"🇧🇷 Brazil": [
"globo", "sbt", "record", "band", "discovery brasil"
],
"🇲🇽 Mexico": [
"televisa", "tv azteca", "once tv", "discovery mexico"
],
"🇦🇷 Argentina": [
"telefe", "canal 13", "america tv", "discovery argentina"
],
"🇵🇭 Philippines": [
"abs-cbn", "gma", "anc", "tv5", "pba rush"
]
2025-06-29 03:22:13 +02:00
}
2025-06-29 03:51:38 +02:00
for country, keywords in broadcaster_patterns.items():
for keyword in keywords:
if keyword in all_text:
return country
2025-06-29 02:02:34 +02:00
2025-06-29 02:47:11 +02:00
return "Uncategorized"
2025-06-29 02:06:07 +02:00
2025-06-29 03:51:38 +02:00
def is_valid_country_group(group_name):
"""Check if group name is a valid country (not a streaming service)"""
valid_countries = [
"🇺🇸 United States", "🇨🇦 Canada", "🇬🇧 United Kingdom", "🇩🇪 Germany",
"🇫🇷 France", "🇮🇹 Italy", "🇪🇸 Spain", "🇳🇱 Netherlands", "🇧🇪 Belgium",
"🇦🇹 Austria", "🇨🇭 Switzerland", "🇸🇪 Sweden", "🇳🇴 Norway", "🇩🇰 Denmark",
"🇫🇮 Finland", "🇵🇱 Poland", "🇨🇿 Czech Republic", "🇭🇺 Hungary", "🇵🇹 Portugal",
"🇬🇷 Greece", "🇷🇴 Romania", "🇧🇬 Bulgaria", "🇭🇷 Croatia", "🇷🇸 Serbia",
"🇦🇺 Australia", "🇯🇵 Japan", "🇰🇷 South Korea", "🇮🇳 India", "🇨🇳 China",
"🇧🇷 Brazil", "🇲🇽 Mexico", "🇦🇷 Argentina", "🇨🇱 Chile", "🇨🇴 Colombia",
"🇷🇺 Russia", "🇹🇷 Turkey", "🇸🇦 Saudi Arabia", "🇦🇪 UAE", "🇪🇬 Egypt",
"🇿🇦 South Africa", "🇳🇬 Nigeria", "🇰🇪 Kenya", "🇮🇱 Israel", "🇹🇭 Thailand",
"🇻🇳 Vietnam", "🇵🇭 Philippines", "🇮🇩 Indonesia", "🇲🇾 Malaysia", "🇸🇬 Singapore"
]
return group_name in valid_countries
2025-06-29 02:02:34 +02:00
def load_channels():
2025-06-29 03:51:38 +02:00
"""Load channels from channels.txt with enhanced parsing for malformed entries."""
2025-06-29 02:02:34 +02:00
if not os.path.exists('channels.txt'):
2025-06-29 02:47:11 +02:00
print("❌ No channels.txt found")
return []
2025-06-28 23:41:12 +02:00
try:
2025-06-29 02:02:34 +02:00
with open('channels.txt', 'r', encoding='utf-8') as f:
2025-06-28 23:41:12 +02:00
content = f.read()
2025-06-29 02:47:11 +02:00
channels = []
for block in content.split('\n\n'):
2025-06-28 23:41:12 +02:00
if not block.strip():
continue
2025-06-29 02:47:11 +02:00
channel_data = {}
for line in block.strip().split('\n'):
2025-06-28 23:41:12 +02:00
if '=' in line:
key, value = line.split('=', 1)
2025-06-29 03:51:38 +02:00
key = key.strip()
value = value.strip()
# Clean up malformed values (fix the quote issues we saw)
if key == "Stream name" and value.startswith('"') and value.count('"') > 2:
# Handle malformed entries like: ".AB.ca",.AB.ca" tvg-logo=...
# Extract just the actual channel name
parts = value.split(',')
if len(parts) > 1:
value = parts[-1].strip().strip('"')
channel_data[key] = value
2025-06-28 23:41:12 +02:00
2025-06-29 03:51:38 +02:00
# Only add channels with valid stream names
if channel_data.get('Stream name') and len(channel_data.get('Stream name', '')) > 1:
2025-06-28 23:41:12 +02:00
channels.append(channel_data)
2025-06-29 03:51:38 +02:00
print(f"✅ Loaded {len(channels)} channels (with enhanced parsing)")
2025-06-29 02:47:11 +02:00
return channels
2025-06-28 23:41:12 +02:00
except Exception as e:
2025-06-29 02:02:34 +02:00
print(f"❌ Error loading channels: {e}")
2025-06-29 02:47:11 +02:00
return []
2025-06-28 23:41:12 +02:00
2025-06-29 03:51:38 +02:00
2025-06-29 02:47:11 +02:00
def reorganize_channels(channels):
2025-06-29 03:51:38 +02:00
"""Enhanced reorganization with 3-point analysis."""
print("🔍 Enhanced Country Detection with 3-Point Analysis")
print("📊 Analyzing: Channel Name + EPG ID + Logo URL")
print("-" * 60)
2025-06-29 02:02:34 +02:00
changes = 0
2025-06-29 03:51:38 +02:00
stats = {
'country_detected': 0,
'sent_to_uncategorized': 0,
'kept_existing_country': 0
}
country_counts = {}
2025-06-29 02:02:34 +02:00
for channel in channels:
old_group = channel.get('Group', 'Uncategorized')
stream_name = channel.get('Stream name', '')
epg_id = channel.get('EPG id', '')
logo = channel.get('Logo', '')
2025-06-29 02:35:11 +02:00
stream_url = channel.get('Stream URL', '')
2025-06-29 02:02:34 +02:00
2025-06-29 03:51:38 +02:00
# Detect country using enhanced 3-point analysis
detected_country = detect_country_from_channel_content(stream_name, epg_id, logo, stream_url)
2025-06-29 02:02:34 +02:00
2025-06-29 03:51:38 +02:00
# Decide final group
if is_valid_country_group(old_group) and detected_country != "Uncategorized":
# Keep existing valid country
final_group = old_group
stats['kept_existing_country'] += 1
elif detected_country != "Uncategorized":
# Use detected country
final_group = detected_country
stats['country_detected'] += 1
if old_group != detected_country:
print(f"🔍 Fixed: '{stream_name}' {old_group}{detected_country}")
changes += 1
else:
# Send to Uncategorized
final_group = "Uncategorized"
stats['sent_to_uncategorized'] += 1
if old_group != "Uncategorized":
2025-06-29 02:47:11 +02:00
print(f"📱 Platform: '{stream_name}' → Uncategorized")
2025-06-29 03:51:38 +02:00
changes += 1
2025-06-29 02:35:11 +02:00
2025-06-29 03:51:38 +02:00
channel['Group'] = final_group
country_counts[final_group] = country_counts.get(final_group, 0) + 1
print(f"\n📊 PROCESSING RESULTS:")
print(f"✅ Changes made: {changes}")
print(f"🔍 Country detected: {stats['country_detected']}")
print(f"✅ Kept existing countries: {stats['kept_existing_country']}")
print(f"📱 Sent to Uncategorized: {stats['sent_to_uncategorized']}")
2025-06-29 03:22:13 +02:00
2025-06-29 03:51:38 +02:00
print(f"\n🌍 FINAL GROUP DISTRIBUTION:")
sorted_countries = sorted(country_counts.items(), key=lambda x: (x[0] == "Uncategorized", -x[1]))
for country, count in sorted_countries:
print(f" {country}: {count} channels")
2025-06-29 02:02:34 +02:00
return channels
2025-06-29 03:51:38 +02:00
2025-06-29 02:02:34 +02:00
def save_channels(channels):
2025-06-29 02:47:11 +02:00
"""Save channels to file."""
# Backup
2025-06-29 02:02:34 +02:00
if os.path.exists('channels.txt'):
2025-06-29 02:47:11 +02:00
backup = f"channels_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
shutil.copy2('channels.txt', backup)
print(f"📋 Backup: {backup}")
2025-06-29 02:02:34 +02:00
2025-06-28 23:41:12 +02:00
try:
2025-06-29 02:02:34 +02:00
with open('channels.txt', 'w', encoding='utf-8') as f:
2025-06-28 23:41:12 +02:00
for i, channel in enumerate(channels):
if i > 0:
f.write("\n\n")
f.write(f"Group = {channel.get('Group', 'Uncategorized')}\n")
f.write(f"Stream name = {channel.get('Stream name', 'Unknown')}\n")
f.write(f"Logo = {channel.get('Logo', '')}\n")
f.write(f"EPG id = {channel.get('EPG id', '')}\n")
f.write(f"Stream URL = {channel.get('Stream URL', '')}\n")
2025-06-29 02:47:11 +02:00
print(f"✅ Saved {len(channels)} channels")
2025-06-28 23:41:12 +02:00
return True
except Exception as e:
2025-06-29 02:47:11 +02:00
print(f"❌ Save error: {e}")
2025-06-28 23:41:12 +02:00
return False
2025-06-29 03:51:38 +02:00
2025-06-29 02:02:34 +02:00
def generate_m3u(channels):
2025-06-29 02:47:11 +02:00
"""Generate M3U playlist."""
2025-06-28 23:41:12 +02:00
try:
2025-06-29 02:02:34 +02:00
with open('playlist.m3u', 'w', encoding='utf-8') as f:
2025-06-28 23:41:12 +02:00
f.write('#EXTM3U\n')
for channel in channels:
2025-06-29 02:47:11 +02:00
name = channel.get('Stream name', '')
2025-06-28 23:41:12 +02:00
group = channel.get('Group', 'Uncategorized')
logo = channel.get('Logo', '')
epg_id = channel.get('EPG id', '')
url = channel.get('Stream URL', '')
2025-06-29 02:47:11 +02:00
if name and url:
2025-06-28 23:41:12 +02:00
f.write(f'#EXTINF:-1 group-title="{group}"')
if logo:
f.write(f' tvg-logo="{logo}"')
if epg_id:
f.write(f' tvg-id="{epg_id}"')
2025-06-29 02:47:11 +02:00
f.write(f',{name}\n{url}\n')
2025-06-27 18:36:13 +02:00
2025-06-29 02:47:11 +02:00
print("✅ Generated playlist.m3u")
2025-06-29 02:02:34 +02:00
return True
except Exception as e:
2025-06-29 02:47:11 +02:00
print(f"❌ M3U error: {e}")
2025-06-29 02:02:34 +02:00
return False
2025-06-29 03:22:13 +02:00
2025-06-29 02:02:34 +02:00
def main():
2025-06-29 02:47:11 +02:00
"""Main function."""
2025-06-29 03:51:38 +02:00
print("🎯 Enhanced IPTV Country Detection - 3-Point Analysis")
print("=" * 70)
print("🔍 Analyzes: Channel Name + EPG ID + Logo URL")
print("🎯 Filters: Only countries remain, streaming services → Uncategorized")
print("=" * 70)
2025-06-29 02:02:34 +02:00
channels = load_channels()
if not channels:
2025-06-29 03:51:38 +02:00
return False
2025-06-29 02:02:34 +02:00
2025-06-29 03:51:38 +02:00
# Enhanced reorganization
2025-06-29 02:47:11 +02:00
channels = reorganize_channels(channels)
2025-06-29 02:02:34 +02:00
2025-06-29 03:51:38 +02:00
# Sort: Countries first (alphabetically), then Uncategorized last
2025-06-29 02:47:11 +02:00
channels.sort(key=lambda x: (
"zzz" if x.get('Group') == "Uncategorized" else x.get('Group', ''),
x.get('Stream name', '')
2025-06-29 02:35:11 +02:00
))
2025-06-29 02:02:34 +02:00
2025-06-29 02:47:11 +02:00
# Save and generate
if not save_channels(channels):
2025-06-29 02:02:34 +02:00
return False
2025-06-29 02:47:11 +02:00
if not generate_m3u(channels):
2025-06-29 02:02:34 +02:00
return False
2025-06-29 02:47:11 +02:00
# Clear import
2025-06-29 02:02:34 +02:00
try:
with open('bulk_import.m3u', 'w', encoding='utf-8') as f:
2025-06-29 02:47:11 +02:00
f.write('#EXTM3U\n')
2025-06-29 02:02:34 +02:00
print("🧹 Cleared import file")
except:
pass
2025-06-29 03:51:38 +02:00
print("\n🎉 ENHANCED PROCESSING COMPLETE!")
print("✅ 3-point analysis applied to all channels")
print("✅ Countries detected from EPG ID, Logo URL, and Channel Names")
print("✅ Streaming services filtered to Uncategorized")
print("✅ Clean country-organized playlist generated")
2025-06-29 02:02:34 +02:00
return True
2025-06-27 16:34:52 +02:00
2025-06-29 03:51:38 +02:00
2025-06-27 16:34:52 +02:00
if __name__ == "__main__":
2025-06-29 02:02:34 +02:00
success = main()
2025-06-27 23:26:06 +02:00
exit(0 if success else 1)