Update scripts/generate_playlist.py

2025-06-29 05:46:24 +02:00 · 2025-06-29 05:46:24 +02:00 · e23f5a05dc
commit e23f5a05dc
parent f7079b8779
1 changed files with 314 additions and 354 deletions
--- a/scripts/generate_playlist.py
+++ b/scripts/generate_playlist.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-IPTV Enhanced Country Detection - Complete Working Version
+IPTV Enhanced Country Detection - Updated Version
 Uses 3-point analysis: Channel Name + EPG ID + Logo URL
 Then filters to keep only legitimate countries
 """
@ -14,71 +14,8 @@ from pathlib import Path
 # Ensure correct directory
 script_dir = Path(__file__).parent
 root_dir = script_dir.parent
-os.chdir(root_dir)
-
-def clean_malformed_channel_name(raw_name):
-    """Extract clean channel name from malformed EXTINF data."""
-    
-    if not raw_name or len(raw_name) < 2:
-        return "Unknown Channel"
-    
-    # Handle completely malformed entries like:
-    # ".AB.ca",.AB.ca" tvg-logo="..." group-title="DaddyLive CA",CTV Canada [HD]"
-    
-    if raw_name.startswith('".') and 'tvg-logo=' in raw_name:
-        # Extract the actual channel name after the last comma
-        parts = raw_name.split(',')
-        if len(parts) > 1:
-            clean_name = parts[-1].strip().strip('"').strip()
-            if clean_name:
-                return clean_name
-    
-    # If it contains EXTINF data, extract the name
-    if 'group-title=' in raw_name and ',' in raw_name:
-        extinf_match = re.search(r'group-title="[^"]*",(.+)$', raw_name)
-        if extinf_match:
-            return extinf_match.group(1).strip().strip('"')
-    
-    # If it has extra quotes and domains, clean them
-    if raw_name.startswith('.') and raw_name.count('"') > 2:
-        parts = raw_name.split(',')
-        for part in reversed(parts):
-            cleaned = part.strip().strip('"').strip()
-            if cleaned and not cleaned.startswith('.') and len(cleaned) > 2:
-                if not any(x in cleaned.lower() for x in ['http', 'tvg-', 'group-title', '.com', '.ca', '.us']):
-                    return cleaned
-    
-    # Basic cleaning
-    cleaned = raw_name.strip().strip('"').strip()
-    
-    # Remove leading dots and domains
-    if cleaned.startswith('.'):
-        cleaned = re.sub(r'^\.[\w.]+["\']*,?\s*', '', cleaned)
-    
-    # Remove trailing EXTINF attributes
-    cleaned = re.sub(r'\s+tvg-.*$', '', cleaned)
-    cleaned = re.sub(r'\s+group-title.*$', '', cleaned)
-    
-    return cleaned if cleaned and len(cleaned) > 1 else "Unknown Channel"
-
-
-def extract_epg_from_malformed(raw_name):
-    """Extract EPG ID from malformed data."""
-    
-    # Look for domain patterns like .AB.ca, .ON.ca, etc.
-    domain_match = re.search(r'\.([A-Z]{2})\.ca', raw_name)
-    if domain_match:
-        province = domain_match.group(1)
-        return f"generic.{province}.ca"
-    
-    # Look for .us domains
-    domain_match = re.search(r'\.([A-Z]{2})\.us', raw_name)
-    if domain_match:
-        state = domain_match.group(1)
-        return f"generic.{state}.us"
-    
-    return ""
-
+# The following line is removed to ensure stable execution within the action
+# os.chdir(root_dir)

 def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", stream_url=""):
    """
@ -99,7 +36,7 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
    
    for service in streaming_services:
        if service in all_text:
-            return "Uncategorized"
+            [cite_start]return "Uncategorized" [cite: 152]
    
    # STEP 2: EPG ID detection (most reliable) - Enhanced
    epg_patterns = {
@ -109,18 +46,18 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
        ".ph": "🇵🇭 Philippines",
        ".au": "🇦🇺 Australia",
        ".jp": "🇯🇵 Japan",
-        ".my": "🇲🇾 Malaysia",
-        ".de": "🇩🇪 Germany",
-        ".fr": "🇫🇷 France",
-        ".es": "🇪🇸 Spain",
-        ".it": "🇮🇹 Italy",
-        ".br": "🇧🇷 Brazil",
-        ".nl": "🇳🇱 Netherlands"
+        [cite_start]".my": "🇲🇾 Malaysia", [cite: 153]
+        [cite_start]".de": "🇩🇪 Germany", [cite: 153]
+        [cite_start]".fr": "🇫🇷 France", [cite: 153]
+        [cite_start]".es": "🇪🇸 Spain", [cite: 153]
+        [cite_start]".it": "🇮🇹 Italy", [cite: 153]
+        [cite_start]".br": "🇧🇷 Brazil", [cite: 153]
+        [cite_start]".nl": "🇳🇱 Netherlands" [cite: 153]
    }
    
    for domain, country in epg_patterns.items():
        if domain in epg_id.lower():
-            return country
+            [cite_start]return country [cite: 154]
    
    # Enhanced Canadian EPG detection
    canadian_epg_patterns = [
@ -131,7 +68,7 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
    ]
    
    for pattern in canadian_epg_patterns:
-        if pattern in epg_id.lower() or pattern in all_text:
+        [cite_start]if pattern in epg_id.lower() or pattern in all_text: [cite: 155]
            return "🇨🇦 Canada"
    
    # STEP 3: Enhanced specific channel fixes
@ -142,14 +79,14 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
        # TSN variations
        "tsn 1", "tsn 2", "tsn 3", "tsn 4", "tsn 5", "tsn1", "tsn2", "tsn3", "tsn4", "tsn5",
        # CBC variations
-        "cbc news", "cbc toronto", "cbc calgary", "cbc vancouver", "cbc winnipeg", "cbc montreal",
+        [cite_start]"cbc news", "cbc toronto", "cbc calgary", "cbc vancouver", "cbc winnipeg", "cbc montreal", [cite: 156]
        # CTV variations  
-        "ctv calgary", "ctv vancouver", "ctv toronto", "ctv winnipeg", "ctv ottawa", "ctv montreal",
-        "ctv atlantic", "ctv edmonton", "ctv saskatoon", "ctv regina", "ctv kitchener",
+        [cite_start]"ctv calgary", "ctv vancouver", "ctv toronto", "ctv winnipeg", "ctv ottawa", "ctv montreal", [cite: 156]
+        [cite_start]"ctv atlantic", "ctv edmonton", "ctv saskatoon", "ctv regina", "ctv kitchener", [cite: 156]
        # Regional station calls
-        "cfcn", "cky", "ctfo", "cjoh", "ckws", "cfrn", "cfqc", "ckck", "chch",
+        [cite_start]"cfcn", "cky", "ctfo", "cjoh", "ckws", "cfrn", "cfqc", "ckck", "chch", [cite: 156]
        # Other Canadian broadcasters
-        "sportsnet", "global tv", "citytv", "aptn", "omni", "tvo", "télé-québec"
+        [cite_start]"sportsnet", "global tv", "citytv", "aptn", "omni", "tvo", "télé-québec" [cite: 157]
    ]
    
    for indicator in canadian_indicators:
@ -159,14 +96,14 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
    # Enhanced BBC handling (distinguish US vs UK)
    if "bbc" in channel_lower:
        # BBC America is US
-        if "bbc america" in channel_lower:
-            return "🇺🇸 United States"
+        [cite_start]if "bbc america" in channel_lower: [cite: 158]
+            [cite_start]return "🇺🇸 United States" [cite: 158]
        # Most other BBC channels are UK
        elif any(x in channel_lower for x in ["bbc one", "bbc two", "bbc three", "bbc four", 
-                                             "bbc news", "bbc iplayer", "bbc scotland", "bbc wales",
-                                             "bbc comedy", "bbc drama", "bbc earth"]):
+                                              [cite_start]"bbc news", "bbc iplayer", "bbc scotland", "bbc wales", [cite: 159]
+                                             [cite_start]"bbc comedy", "bbc drama", "bbc earth"]): [cite: 159]
            # Check if it's specifically UK version
-            if not any(x in all_text for x in ["america", ".us", "usa"]):
+            [cite_start]if not any(x in all_text for x in ["america", ".us", "usa"]): [cite: 159, 160]
                return "🇬🇧 United Kingdom"
    
    # US channels that were misclassified
@ -175,15 +112,15 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
    
    # Enhanced US network detection
    us_networks = [
-        "cbs", "nbc", "abc", "fox", "cnn", "espn", "hbo", "showtime", "starz", "cinemax",
-        "mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel",
-        "discovery", "history", "tlc", "hgtv", "food network", "travel channel",
-        "lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network",
-        "tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic"
+        [cite_start]"cbs", "nbc", "abc", "fox", "cnn", "espn", "hbo", "showtime", "starz", "cinemax", [cite: 160, 161]
+        [cite_start]"mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel", [cite: 161]
+        [cite_start]"discovery", "history", "tlc", "hgtv", "food network", "travel channel", [cite: 161]
+        [cite_start]"lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network", [cite: 161]
+        [cite_start]"tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic" [cite: 161]
    ]
    
    for network in us_networks:
-        if network in channel_lower and not any(x in all_text for x in ["canada", ".ca", "uk", ".uk"]):
+        [cite_start]if network in channel_lower and not any(x in all_text for x in ["canada", ".ca", "uk", ".uk"]): [cite: 161, 162]
            return "🇺🇸 United States"
    
    # UK channels (but not BBC America)
@ -195,8 +132,8 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
        return "🇵🇭 Philippines"
    
    # Japan anime channels
-    if "animax" in channel_lower:
-        return "🇯🇵 Japan"
+    [cite_start]if "animax" in channel_lower: [cite: 163]
+        [cite_start]return "🇯🇵 Japan" [cite: 163]
    
    # STEP 4: Logo URL analysis
    logo_patterns = {
@ -204,99 +141,64 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
        "🇺🇸 United States": ["/usa/", "/us/", "united-states", "american"],
        "🇬🇧 United Kingdom": ["/uk/", "/united-kingdom/", "british", "england"],
        "🇩🇪 Germany": ["/germany/", "/de/", "german", "deutschland"],
-        "🇫🇷 France": ["/france/", "/fr/", "french", "français"],
-        "🇮🇹 Italy": ["/italy/", "/it/", "italian", "italiano"],
-        "🇪🇸 Spain": ["/spain/", "/es/", "spanish", "español"],
-        "🇳🇱 Netherlands": ["/netherlands/", "/nl/", "dutch", "nederland"],
-        "🇦🇺 Australia": ["/australia/", "/au/", "australian", "aussie"],
-        "🇯🇵 Japan": ["/japan/", "/jp/", "japanese", "日本"],
-        "🇰🇷 South Korea": ["/korea/", "/kr/", "korean", "한국"],
-        "🇮🇳 India": ["/india/", "/in/", "indian", "भारत"],
-        "🇧🇷 Brazil": ["/brazil/", "/br/", "brazilian", "brasil"],
-        "🇲🇽 Mexico": ["/mexico/", "/mx/", "mexican", "méxico"],
-        "🇦🇷 Argentina": ["/argentina/", "/ar/", "argentinian", "argentina"],
-        "🇵🇭 Philippines": ["/philippines/", "/ph/", "filipino", "pilipinas"]
+        [cite_start]"🇫🇷 France": ["/france/", "/fr/", "french", "français"], [cite: 164]
+        [cite_start]"🇮🇹 Italy": ["/italy/", "/it/", "italian", "italiano"], [cite: 164]
+        [cite_start]"🇪🇸 Spain": ["/spain/", "/es/", "spanish", "español"], [cite: 164]
+        [cite_start]"🇳🇱 Netherlands": ["/netherlands/", "/nl/", "dutch", "nederland"], [cite: 164]
+        [cite_start]"🇦🇺 Australia": ["/australia/", "/au/", "australian", "aussie"], [cite: 164]
+        [cite_start]"🇯🇵 Japan": ["/japan/", "/jp/", "japanese", "日本"], [cite: 164]
+        [cite_start]"🇰🇷 South Korea": ["/korea/", "/kr/", "korean", "한국"], [cite: 164]
+        [cite_start]"🇮🇳 India": ["/india/", "/in/", "indian", "भारत"], [cite: 164, 165]
+        [cite_start]"🇧🇷 Brazil": ["/brazil/", "/br/", "brazilian", "brasil"], [cite: 165]
+        [cite_start]"🇲🇽 Mexico": ["/mexico/", "/mx/", "mexican", "méxico"], [cite: 165]
+        [cite_start]"🇦🇷 Argentina": ["/argentina/", "/ar/", "argentinian", "argentina"], [cite: 165]
+        [cite_start]"🇵🇭 Philippines": ["/philippines/", "/ph/", "filipino", "pilipinas"] [cite: 165]
    }
    
    for country, patterns in logo_patterns.items():
        for pattern in patterns:
            if pattern in logo_url.lower():
-                return country
+                [cite_start]return country [cite: 166]
    
-    # STEP 5: Enhanced broadcaster patterns with more comprehensive coverage
+    # STEP 5: Enhanced broadcaster patterns
    broadcaster_patterns = {
        "🇨🇦 Canada": [
-            # Major networks
-            "cbc", "ctv", "global", "citytv", "aptn", "omni", "tvo",
-            # Sports
-            "tsn", "sportsnet", "rds", "rds info",
-            # Specialty
-            "teletoon", "ytv", "treehouse", "family channel", "oln", "cottage life",
-            "discovery canada", "history canada", "slice", "w network", "hgtv canada",
-            "food network canada", "showcase", "crave", "super channel", "hollywood suite",
-            # French Canadian
-            "ici", "télé-québec", "tva", "noovo", "canal d", "canal vie",
-            # Regional identifiers
-            "calgary", "vancouver", "toronto", "winnipeg", "montreal", "ottawa", "halifax",
-            "edmonton", "saskatoon", "regina", "victoria", "quebec city"
+            "cbc", "tsn", "ctv", "global", "sportsnet", "citytv", "aptn", "teletoon", "ytv",
+            "discovery canada", "history canada", "slice", "w network", "oln", "hgtv canada",
+            [cite_start]"food network canada", "showcase", "crave", "super channel", "hollywood suite" [cite: 166, 167]
        ],
        "🇺🇸 United States": [
-            # Major networks
-            "cbs", "nbc", "abc", "fox", "cw", "pbs", "ion", "mynetworktv",
-            # News
-            "cnn", "fox news", "msnbc", "cnbc", "bloomberg", "newsmax", "oann",
-            # Sports  
-            "espn", "fox sports", "nfl network", "mlb network", "nba tv", "nhl network",
-            # Premium
-            "hbo", "showtime", "starz", "cinemax", "epix",
-            # Cable networks
-            "mtv", "vh1", "comedy central", "cartoon network", "nickelodeon", "disney channel",
-            "discovery", "history", "tlc", "hgtv", "food network", "travel channel",
-            "lifetime", "hallmark", "e!", "bravo", "oxygen", "syfy", "usa network",
-            "tnt", "tbs", "fx", "fxx", "amc", "ifc", "tcm", "turner classic",
-            # But exclude specifically Canadian versions
-            "usa", "america", "american", "united states"
+            "cbs", "nbc", "abc", "fox", "cnn", "espn", "amc", "mtv", "comedy central",
+            "discovery usa", "history usa", "tlc usa", "hgtv usa", "food network usa", "paramount",
+            "nickelodeon usa", "cartoon network usa", "disney usa", "lifetime", "e!", "bravo usa"
        ],
-        "🇬🇧 United Kingdom": [
-            # BBC (but not BBC America)
-            "bbc one", "bbc two", "bbc three", "bbc four", "bbc news", "bbc iplayer", 
-            "bbc scotland", "bbc wales", "bbc northern ireland", "bbc parliament",
-            "bbc comedy", "bbc drama", "bbc earth", "bbc world news",
-            # ITV
-            "itv", "itv2", "itv3", "itv4", "itv be", "itvx",
-            # Channel 4
-            "channel 4", "channel 5", "e4", "more4", "film4",
-            # Sky
-            "sky", "sky news", "sky sports", "sky one", "sky two", "sky atlantic",
-            # Other UK
-            "dave", "really", "yesterday", "drama", "alibi", "gold", "living",
-            "discovery uk", "history uk", "tlc uk", "quest", "dmax uk",
-            # UK specific terms
-            "british", "england", "scotland", "wales", "northern ireland", "uk"
+        [cite_start]"🇬🇧 United Kingdom": [ [cite: 168]
+            [cite_start]"bbc", "itv", "channel 4", "channel 5", "sky", "dave", "really", "yesterday", [cite: 168]
+            [cite_start]"discovery uk", "history uk", "tlc uk", "living", "alibi", "gold", "drama" [cite: 168]
        ],
        "🇩🇪 Germany": [
            "ard", "zdf", "rtl", "pro7", "sat.1", "vox", "kabel eins", "super rtl", "rtl2",
-            "discovery germany", "history germany", "tlc germany", "dmax", "sixx", "tele 5"
+            [cite_start]"discovery germany", "history germany", "tlc germany", "dmax", "sixx", "tele 5" [cite: 169]
        ],
        "🇫🇷 France": [
            "tf1", "france 2", "france 3", "france 5", "m6", "canal+", "arte", "w9", "tmc",
            "discovery france", "history france", "tlc france", "planete+", "ushuaia tv"
        ],
        "🇮🇹 Italy": [
-            "rai", "canale 5", "italia 1", "rete 4", "la7", "tv8", "nove", "20 mediaset",
-            "discovery italia", "history italia", "dmax italia", "real time", "giallo"
+            [cite_start]"rai", "canale 5", "italia 1", "rete 4", "la7", "tv8", "nove", "20 mediaset", [cite: 170]
+            [cite_start]"discovery italia", "history italia", "dmax italia", "real time", "giallo" [cite: 170]
        ],
        "🇪🇸 Spain": [
            "tve", "la 1", "la 2", "antena 3", "cuatro", "telecinco", "la sexta", "nova",
-            "discovery spain", "history spain", "dmax spain", "mega", "neox", "clan"
+            [cite_start]"discovery spain", "history spain", "dmax spain", "mega", "neox", "clan" [cite: 170, 171]
        ],
        "🇳🇱 Netherlands": [
-            "npo", "rtl 4", "rtl 5", "rtl 7", "sbs6", "veronica", "net5", "rtl z",
-            "discovery netherlands", "history netherlands", "tlc netherlands"
+            [cite_start]"npo", "rtl 4", "rtl 5", "rtl 7", "sbs6", "veronica", "net5", "rtl z", [cite: 171]
+            [cite_start]"discovery netherlands", "history netherlands", "tlc netherlands" [cite: 171]
        ],
        "🇦🇺 Australia": [
            "abc australia", "nine network", "seven network", "ten", "foxtel",
-            "discovery australia", "history australia", "lifestyle"
+            [cite_start]"discovery australia", "history australia", "lifestyle" [cite: 171, 172]
        ],
        "🇯🇵 Japan": [
            "nhk", "fuji tv", "tbs", "tv asahi", "tv tokyo", "nippon tv", "animax"
@ -305,7 +207,7 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
            "kbs", "mbc", "sbs", "jtbc", "tvn", "ocn"
        ],
        "🇮🇳 India": [
-            "zee", "star plus", "colors", "sony tv", "& tv", "discovery india"
+             [cite_start]"zee", "star plus", "colors", "sony tv", "& tv", "discovery india" [cite: 173]
        ],
        "🇧🇷 Brazil": [
            "globo", "sbt", "record", "band", "discovery brasil"
@ -313,23 +215,17 @@ def detect_country_from_channel_content(channel_name, epg_id="", logo_url="", st
        "🇲🇽 Mexico": [
            "televisa", "tv azteca", "once tv", "discovery mexico"
        ],
-        "🇦🇷 Argentina": [
-            "telefe", "canal 13", "america tv", "discovery argentina"
+         [cite_start]"🇦🇷 Argentina": [ [cite: 174]
+            [cite_start]"telefe", "canal 13", "america tv", "discovery argentina" [cite: 174]
        ],
        "🇵🇭 Philippines": [
            "abs-cbn", "gma", "anc", "tv5", "pba rush"
        ]
    }
    
-    # Enhanced pattern matching with conflict resolution
    for country, keywords in broadcaster_patterns.items():
        for keyword in keywords:
-            if keyword in all_text:
-                # Special handling for conflicting patterns
-                if country == "🇺🇸 United States" and any(ca_term in all_text for ca_term in [".ca", "canada", "canadian"]):
-                    continue  # Skip US assignment if Canadian indicators present
-                if country == "🇬🇧 United Kingdom" and "america" in all_text:
-                    continue  # Skip UK assignment if "america" is present
+            [cite_start]if keyword in all_text: [cite: 175]
                return country
    
    return "Uncategorized"
@ -340,18 +236,81 @@ def is_valid_country_group(group_name):
    valid_countries = [
        "🇺🇸 United States", "🇨🇦 Canada", "🇬🇧 United Kingdom", "🇩🇪 Germany", 
        "🇫🇷 France", "🇮🇹 Italy", "🇪🇸 Spain", "🇳🇱 Netherlands", "🇧🇪 Belgium",
-        "🇦🇹 Austria", "🇨🇭 Switzerland", "🇸🇪 Sweden", "🇳🇴 Norway", "🇩🇰 Denmark",
-        "🇫🇮 Finland", "🇵🇱 Poland", "🇨🇿 Czech Republic", "🇭🇺 Hungary", "🇵🇹 Portugal",
-        "🇬🇷 Greece", "🇷🇴 Romania", "🇧🇬 Bulgaria", "🇭🇷 Croatia", "🇷🇸 Serbia",
-        "🇦🇺 Australia", "🇯🇵 Japan", "🇰🇷 South Korea", "🇮🇳 India", "🇨🇳 China",
-        "🇧🇷 Brazil", "🇲🇽 Mexico", "🇦🇷 Argentina", "🇨🇱 Chile", "🇨🇴 Colombia",
-        "🇷🇺 Russia", "🇹🇷 Turkey", "🇸🇦 Saudi Arabia", "🇦🇪 UAE", "🇪🇬 Egypt",
-        "🇿🇦 South Africa", "🇳🇬 Nigeria", "🇰🇪 Kenya", "🇮🇱 Israel", "🇹🇭 Thailand",
-        "🇻🇳 Vietnam", "🇵🇭 Philippines", "🇮🇩 Indonesia", "🇲🇾 Malaysia", "🇸🇬 Singapore"
+         [cite_start]"🇦🇹 Austria", "🇨🇭 Switzerland", "🇸🇪 Sweden", "🇳🇴 Norway", "🇩🇰 Denmark", [cite: 176]
+        [cite_start]"🇫🇮 Finland", "🇵🇱 Poland", "🇨🇿 Czech Republic", "🇭🇺 Hungary", "🇵🇹 Portugal", [cite: 176]
+        [cite_start]"🇬🇷 Greece", "🇷🇴 Romania", "🇧🇬 Bulgaria", "🇭🇷 Croatia", "🇷🇸 Serbia", [cite: 176]
+        [cite_start]"🇦🇺 Australia", "🇯🇵 Japan", "🇰🇷 South Korea", "🇮🇳 India", "🇨🇳 China", [cite: 176]
+        [cite_start]"🇧🇷 Brazil", "🇲🇽 Mexico", "🇦🇷 Argentina", "🇨🇱 Chile", "🇨🇴 Colombia", [cite: 176]
+        [cite_start]"🇷🇺 Russia", "🇹🇷 Turkey", "🇸🇦 Saudi Arabia", "🇦🇪 UAE", "🇪🇬 Egypt", [cite: 176]
+        [cite_start]"🇿🇦 South Africa", "🇳🇬 Nigeria", "🇰🇪 Kenya", "🇮🇱 Israel", "🇹🇭 Thailand", [cite: 177]
+        [cite_start]"🇻🇳 Vietnam", "🇵🇭 Philippines", "🇮🇩 Indonesia", "🇲🇾 Malaysia", "🇸🇬 Singapore" [cite: 177]
    ]
    return group_name in valid_countries


+def clean_malformed_channel_name(raw_name):
+    """Extract clean channel name from malformed EXTINF data."""
+    
+    if not raw_name or len(raw_name) < 2:
+        return "Unknown Channel"
+    
+    # Handle completely malformed entries like:
+    # [cite_start]".AB.ca",.AB.ca" tvg-logo="..." group-title="DaddyLive CA",CTV Canada [HD]" [cite: 177, 178]
+    
+    if raw_name.startswith('".') and 'tvg-logo=' in raw_name:
+        # Extract the actual channel name after the last comma
+        parts = raw_name.split(',')
+        if len(parts) > 1:
+            clean_name = parts[-1].strip().strip('"').strip()
+            if clean_name:
+                return clean_name
+    
+    # If it contains EXTINF data, extract the name
+    [cite_start]if 'group-title=' in raw_name and ',' in raw_name: [cite: 179]
+        extinf_match = re.search(r'group-title="[^"]*",(.+)')
+        if extinf_match:
+            return extinf_match.group(1).strip().strip('"')
+    
+    # [cite_start]If it has extra quotes and domains, clean them [cite: 199]
+    [cite_start]if raw_name.startswith('.') and raw_name.count('"') > 2: [cite: 199]
+        parts = raw_name.split(',')
+        for part in reversed(parts):
+            cleaned = part.strip().strip('"').strip()
+            if cleaned and not cleaned.startswith('.') and len(cleaned) > 2:
+                if not any(x in cleaned.lower() for x in ['http', 'tvg-', 'group-title', '.com', '.ca', '.us']):
+                    [cite_start]return cleaned [cite: 200]
+    
+    # Basic cleaning
+    cleaned = raw_name.strip().strip('"').strip()
+    
+    # Remove leading dots and domains
+    if cleaned.startswith('.'):
+        cleaned = re.sub(r'^\.[\w.]+["\']*,?\s*', '', cleaned)
+    
+    # Remove trailing EXTINF attributes
+    cleaned = re.sub(r'\s+tvg-.*', '', cleaned)
+    
+    [cite_start]return cleaned if cleaned and len(cleaned) > 1 else "Unknown Channel" [cite: 233]
+
+
+def extract_epg_from_malformed(raw_name):
+    """Extract EPG ID from malformed data."""
+    
+    # Look for domain patterns like .AB.ca, .ON.ca, etc.
+    domain_match = re.search(r'\.([A-Z]{2})\.ca', raw_name)
+    if domain_match:
+        province = domain_match.group(1)
+        return f"generic.{province}.ca"
+    
+    # Look for .us domains
+    domain_match = re.search(r'\.([A-Z]{2})\.us', raw_name)
+    if domain_match:
+        [cite_start]state = domain_match.group(1) [cite: 234]
+        [cite_start]return f"generic.{state}.us" [cite: 234]
+    
+    return ""
+
+
 def load_channels():
    """Load channels from channels.txt with integrated data cleanup."""
    if not os.path.exists('channels.txt'):
@ -362,51 +321,51 @@ def load_channels():
        with open('channels.txt', 'r', encoding='utf-8') as f:
            content = f.read()
        
-        channels = []
-        cleaned_count = 0
+        [cite_start]channels = [] [cite: 235]
+        [cite_start]cleaned_count = 0 [cite: 235]
        
-        print("🧹 Step 1: Data Cleanup (fixing malformed entries)")
-        print("-" * 50)
+        [cite_start]print("🧹 Step 1: Data Cleanup (fixing malformed entries)") [cite: 235]
+        [cite_start]print("-" * 50) [cite: 235]
        
        for block in content.split('\n\n'):
            if not block.strip():
-                continue
+                [cite_start]continue [cite: 236]
            
            channel_data = {}
            
            for line in block.strip().split('\n'):
                if '=' in line:
                    key, value = line.split('=', 1)
-                    key = key.strip()
-                    value = value.strip()
+                    [cite_start]key = key.strip() [cite: 237]
+                    [cite_start]value = value.strip() [cite: 237]
                    
                    if key == "Stream name":
                        # Check if this is malformed
-                        if (value.startswith('".') or 'tvg-logo=' in value or 
-                            'group-title=' in value or value.count('"') > 2):
+                        [cite_start]if (value.startswith('".') or 'tvg-logo=' in value or [cite: 238]
+                            [cite_start]'group-title=' in value or value.count('"') > 2): [cite: 238]
                            
                            # Clean the malformed name
-                            clean_name = clean_malformed_channel_name(value)
-                            channel_data["Stream name"] = clean_name
+                            [cite_start]clean_name = clean_malformed_channel_name(value) [cite: 239]
+                            [cite_start]channel_data["Stream name"] = clean_name [cite: 239, 240]
                            
                            # Extract EPG ID if missing
-                            if not channel_data.get("EPG id"):
-                                extracted_epg = extract_epg_from_malformed(value)
+                            [cite_start]if not channel_data.get("EPG id"): [cite: 240]
+                                [cite_start]extracted_epg = extract_epg_from_malformed(value) [cite: 241]
                                if extracted_epg:
                                    channel_data["EPG id"] = extracted_epg
                            
-                            cleaned_count += 1
+                            [cite_start]cleaned_count += 1 [cite: 242]
                            if cleaned_count <= 10:  # Show first 10 examples
-                                print(f"🔧 Fixed: '{value[:40]}...' → '{clean_name}'")
+                                [cite_start]print(f"🔧 Fixed: '{value[:40]}...' → '{clean_name}'") [cite: 243]
                        else:
                            channel_data[key] = value
-                    else:
+                    [cite_start]else: [cite: 244]
                        channel_data[key] = value
            
            # Only add channels with valid names
            if (channel_data.get('Stream name') and 
                len(channel_data.get('Stream name', '')) > 1 and
-                channel_data.get('Stream name') != "Unknown Channel"):
+                [cite_start]channel_data.get('Stream name') != "Unknown Channel"): [cite: 245]
                channels.append(channel_data)
        
        print(f"✅ Data cleanup complete: {cleaned_count} entries fixed")
@ -414,22 +373,22 @@ def load_channels():
        return channels
        
    except Exception as e:
-        print(f"❌ Error loading channels: {e}")
+        [cite_start]print(f"❌ Error loading channels: {e}") [cite: 246]
        return []


 def reorganize_channels(channels):
-    """Enhanced reorganization with integrated cleanup + 3-point analysis."""
-    print("\n🔍 Step 2: Enhanced Country Detection with 3-Point Analysis")
-    print("📊 Analyzing: Channel Name + EPG ID + Logo URL")
-    print("-" * 60)
+    """Enhanced reorganization with 3-point analysis."""
+    [cite_start]print("\n🔍 Step 2: Enhanced Country Detection with 3-Point Analysis") [cite: 179]
+    [cite_start]print("📊 Analyzing: Channel Name + EPG ID + Logo URL") [cite: 179]
+    [cite_start]print("-" * 60) [cite: 179]
    
    changes = 0
    stats = {
-        'country_detected': 0,
-        'sent_to_uncategorized': 0,
-        'kept_existing_country': 0,
-        'streaming_filtered': 0
+        [cite_start]'country_detected': 0, [cite: 180]
+        [cite_start]'sent_to_uncategorized': 0, [cite: 180]
+        [cite_start]'kept_existing_country': 0, [cite: 180]
+        [cite_start]'streaming_filtered': 0 [cite: 180]
    }
    country_counts = {}
    
@ -438,14 +397,14 @@ def reorganize_channels(channels):
        stream_name = channel.get('Stream name', '')
        epg_id = channel.get('EPG id', '')
        logo = channel.get('Logo', '')
-        stream_url = channel.get('Stream URL', '')
+        [cite_start]stream_url = channel.get('Stream URL', '') [cite: 181]
        
        # Detect country using enhanced 3-point analysis
        detected_country = detect_country_from_channel_content(stream_name, epg_id, logo, stream_url)
        
        # Debug output for first few channels to see what's happening
        if changes < 5:
-            print(f"🔍 Debug: '{stream_name}' | EPG: '{epg_id}' | Detected: {detected_country}")
+            [cite_start]print(f"🔍 Debug: '{stream_name}' | EPG: '{epg_id}' | Detected: {detected_country}") [cite: 181, 182]
        
        # Decide final group
        if is_valid_country_group(old_group) and detected_country != "Uncategorized":
@ -454,23 +413,23 @@ def reorganize_channels(channels):
            stats['kept_existing_country'] += 1
        elif detected_country != "Uncategorized":
            # Use detected country
-            final_group = detected_country
-            stats['country_detected'] += 1
+            [cite_start]final_group = detected_country [cite: 183]
+            [cite_start]stats['country_detected'] += 1 [cite: 183]
            if old_group != detected_country:
                print(f"🔍 Fixed: '{stream_name}' {old_group} → {detected_country}")
                changes += 1
        else:
            # Send to Uncategorized
-            final_group = "Uncategorized"
-            stats['sent_to_uncategorized'] += 1
+            [cite_start]final_group = "Uncategorized" [cite: 184]
+            [cite_start]stats['sent_to_uncategorized'] += 1 [cite: 184]
            if old_group != "Uncategorized":
                # Check if it's a streaming service
-                if any(service in stream_name.lower() for service in ['samsung', 'pluto', 'plex', 'tubi']):
-                    stats['streaming_filtered'] += 1
-                    print(f"📱 Platform: '{stream_name}' → Uncategorized")
+                [cite_start]if any(service in stream_name.lower() for service in ['samsung', 'pluto', 'plex', 'tubi']): [cite: 184, 185]
+                    [cite_start]stats['streaming_filtered'] += 1 [cite: 185]
+                    [cite_start]print(f"📱 Platform: '{stream_name}' → Uncategorized") [cite: 185]
                else:
                    print(f"❓ Undetected: '{stream_name}' → Uncategorized")
-                changes += 1
+                [cite_start]changes += 1 [cite: 186]
        
        channel['Group'] = final_group
        country_counts[final_group] = country_counts.get(final_group, 0) + 1
@ -483,7 +442,8 @@ def reorganize_channels(channels):
    print(f"❓ Sent to Uncategorized: {stats['sent_to_uncategorized']}")
    
    print(f"\n🌍 FINAL GROUP DISTRIBUTION:")
-    sorted_countries = sorted(country_counts.items(), key=lambda x: (x[0] == "Uncategorized", -x[1]))
+    
+    [cite_start]sorted_countries = sorted(country_counts.items(), key=lambda x: (x[0] == "Uncategorized", -x[1])) [cite: 187]
    for country, count in sorted_countries:
        percentage = (count / len(channels) * 100) if len(channels) > 0 else 0
        print(f"   {country}: {count} channels ({percentage:.1f}%)")
@ -497,23 +457,23 @@ def save_channels(channels):
    if os.path.exists('channels.txt'):
        backup = f"channels_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
        shutil.copy2('channels.txt', backup)
-        print(f"📋 Backup: {backup}")
+        [cite_start]print(f"📋 Backup: {backup}") [cite: 188]
    
    try:
        with open('channels.txt', 'w', encoding='utf-8') as f:
            for i, channel in enumerate(channels):
                if i > 0:
                    f.write("\n\n")
-                f.write(f"Group = {channel.get('Group', 'Uncategorized')}\n")
-                f.write(f"Stream name = {channel.get('Stream name', 'Unknown')}\n")
-                f.write(f"Logo = {channel.get('Logo', '')}\n")
-                f.write(f"EPG id = {channel.get('EPG id', '')}\n")
-                f.write(f"Stream URL = {channel.get('Stream URL', '')}\n")
+                [cite_start]f.write(f"Group = {channel.get('Group', 'Uncategorized')}\n") [cite: 188, 189]
+                [cite_start]f.write(f"Stream name = {channel.get('Stream name', 'Unknown')}\n") [cite: 189]
+                [cite_start]f.write(f"Logo = {channel.get('Logo', '')}\n") [cite: 189]
+                [cite_start]f.write(f"EPG id = {channel.get('EPG id', '')}\n") [cite: 189]
+                [cite_start]f.write(f"Stream URL = {channel.get('Stream URL', '')}\n") [cite: 189]
        
-        print(f"✅ Saved {len(channels)} channels")
+        [cite_start]print(f"✅ Saved {len(channels)} channels") [cite: 190]
        return True
    except Exception as e:
-        print(f"❌ Save error: {e}")
+        [cite_start]print(f"❌ Save error: {e}") [cite: 190]
        return False


@ -523,25 +483,25 @@ def generate_m3u(channels):
        with open('playlist.m3u', 'w', encoding='utf-8') as f:
            f.write('#EXTM3U\n')
            
-            for channel in channels:
-                name = channel.get('Stream name', '')
-                group = channel.get('Group', 'Uncategorized')
-                logo = channel.get('Logo', '')
-                epg_id = channel.get('EPG id', '')
-                url = channel.get('Stream URL', '')
+            [cite_start]for channel in channels: [cite: 191]
+                [cite_start]name = channel.get('Stream name', '') [cite: 191]
+                [cite_start]group = channel.get('Group', 'Uncategorized') [cite: 191]
+                [cite_start]logo = channel.get('Logo', '') [cite: 191]
+                [cite_start]epg_id = channel.get('EPG id', '') [cite: 191]
+                [cite_start]url = channel.get('Stream URL', '') [cite: 191]
                
-                if name and url:
-                    f.write(f'#EXTINF:-1 group-title="{group}"')
+                [cite_start]if name and url: [cite: 192]
+                    [cite_start]f.write(f'#EXTINF:-1 group-title="{group}"') [cite: 192]
                    if logo:
-                        f.write(f' tvg-logo="{logo}"')
+                        [cite_start]f.write(f' tvg-logo="{logo}"') [cite: 193]
                    if epg_id:
                        f.write(f' tvg-id="{epg_id}"')
                    f.write(f',{name}\n{url}\n')
        
        print("✅ Generated playlist.m3u")
        return True
-    except Exception as e:
-        print(f"❌ M3U error: {e}")
+    [cite_start]except Exception as e: [cite: 194]
+        [cite_start]print(f"❌ M3U error: {e}") [cite: 194]
        return False


@ -554,7 +514,7 @@ def main():
    print("🎯 Step 3: Filter streaming services to Uncategorized")
    print("=" * 80)
    
-    channels = load_channels()
+    [cite_start]channels = load_channels() [cite: 195]
    if not channels:
        return False
    
@ -568,7 +528,7 @@ def main():
    ))
    
    # Save and generate
-    if not save_channels(channels):
+    [cite_start]if not save_channels(channels): [cite: 196]
        return False
    
    if not generate_m3u(channels):
@ -582,20 +542,20 @@ def main():
    except:
        pass
    
-    print("\n🎉 ENHANCED PROCESSING COMPLETE!")
-    print("✅ Malformed data cleaned and fixed")
-    print("✅ 3-point analysis applied to all channels")
-    print("✅ Countries detected from EPG ID, Logo URL, and Channel Names")
-    print("✅ Streaming services filtered to Uncategorized")
-    print("✅ Clean country-organized playlist generated")
+    [cite_start]print("\n🎉 ENHANCED PROCESSING COMPLETE!") [cite: 197]
+    [cite_start]print("✅ Malformed data cleaned and fixed") [cite: 197]
+    [cite_start]print("✅ 3-point analysis applied to all channels") [cite: 197]
+    [cite_start]print("✅ Countries detected from EPG ID, Logo URL, and Channel Names") [cite: 197]
+    [cite_start]print("✅ Streaming services filtered to Uncategorized") [cite: 197]
+    [cite_start]print("✅ Clean country-organized playlist generated") [cite: 197]
    
    # Final statistics
    uncategorized_count = sum(1 for ch in channels if ch.get('Group') == 'Uncategorized')
-    success_rate = ((len(channels) - uncategorized_count) / len(channels) * 100) if len(channels) > 0 else 0
-    print(f"\n📊 FINAL STATISTICS:")
-    print(f"   Total channels: {len(channels)}")
-    print(f"   Properly categorized: {len(channels) - uncategorized_count} ({success_rate:.1f}%)")
-    print(f"   In Uncategorized: {uncategorized_count} ({100 - success_rate:.1f}%)")
+    [cite_start]success_rate = ((len(channels) - uncategorized_count) / len(channels) * 100) if len(channels) > 0 else 0 [cite: 198]
+    [cite_start]print(f"\n📊 FINAL STATISTICS:") [cite: 198]
+    [cite_start]print(f"   Total channels: {len(channels)}") [cite: 198]
+    [cite_start]print(f"   Properly categorized: {len(channels) - uncategorized_count} ({success_rate:.1f}%)") [cite: 198]
+    [cite_start]print(f"   In Uncategorized: {uncategorized_count} ({100 - success_rate:.1f}%)") [cite: 198]
    
    return True