"""
Phase 3 自动分类系统
基于商品名称和品牌信息进行智能分类
"""

import re
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass

from app.models.products import ProductCategory


@dataclass
class ClassificationRule:
    """分类规则"""
    category: ProductCategory
    keywords: List[str]
    brand_keywords: List[str]
    patterns: List[str]
    weight: float = 1.0


class ProductClassifier:
    """产品自动分类器"""
    
    def __init__(self):
        self.rules = self._load_classification_rules()
        self.compiled_patterns = self._compile_patterns()
    
    def _load_classification_rules(self) -> Dict[ProductCategory, ClassificationRule]:
        """加载分类规则"""
        rules = {
            # 美妆类 MC
            ProductCategory.MC: ClassificationRule(
                category=ProductCategory.MC,
                keywords=[
                    "口红", "唇膏", "唇釉", "唇彩", "唇蜜", "润唇膏", "唇线笔",
                    "粉底", "粉底液", "气垫", "BB霜", "CC霜", "遮瑕", "散粉", "蜜粉",
                    "眼影", "眼线", "眼线笔", "眼线液", "睫毛膏", "眉笔", "眉粉", "眉毛", "染眉膏",
                    "腮红", "高光", "阴影", "修容", "化妆品", "彩妆", "美妆", "护肤品",
                    "面霜", "面膜", "精华", "乳液", "爽肤水", "洁面", "卸妆", "防晒霜",
                    "香水", "香氛", "指甲油", "美甲", "卸甲", "甲油胶"
                ],
                brand_keywords=[
                    "雅诗兰黛", "兰蔻", "迪奥", "香奈儿", "YSL", "圣罗兰", "阿玛尼", "娇兰",
                    "SK-II", "资生堂", "FANCL", "POLA", "KOSE", "花王", "佳丽宝",
                    "欧莱雅", "美宝莲", "卡姿兰", "完美日记", "花西子", "橘朵", "colorkey",
                    "MAC", "NARS", "URBAN DECAY", "TOO FACED", "BENEFIT", "CLINIQUE"
                ],
                patterns=[
                    r".*[口唇][红膏釉彩蜜].*",
                    r".*[粉底][液霜].*",
                    r".*[眼影睫毛眉毛].*",
                    r".*[面膜护肤化妆美妆].*",
                    r".*[香水香氛].*"
                ],
                weight=1.0
            ),
            
            # 服装类 AT
            ProductCategory.AT: ClassificationRule(
                category=ProductCategory.AT,
                keywords=[
                    "T恤", "衬衫", "衬衣", "polo", "POLO", "卫衣", "毛衣", "针织", "开衫",
                    "外套", "夹克", "风衣", "大衣", "棉衣", "羽绒服", "马甲", "背心",
                    "裤子", "牛仔裤", "休闲裤", "西裤", "短裤", "运动裤", "打底裤", "连体裤",
                    "连衣裙", "半身裙", "长裙", "短裙", "A字裙", "包臀裙", "百褶裙",
                    "西装", "套装", "正装", "礼服", "晚装", "婚纱", "旗袍",
                    "睡衣", "家居服", "内衣", "文胸", "内裤", "袜子", "丝袜", "连裤袜",
                    "上衣", "下装", "服装", "服饰", "童装", "男装", "女装"
                ],
                brand_keywords=[
                    "ZARA", "H&M", "UNIQLO", "优衣库", "GAP", "COS", "MUJI", "无印良品",
                    "NIKE", "ADIDAS", "PUMA", "李宁", "安踏", "特步", "361", "匹克",
                    "ONLY", "VERO MODA", "JACK JONES", "SELECTED", "太平鸟", "森马",
                    "李维斯", "LEVIS", "CALVIN KLEIN", "TOMMY", "POLO", "拉夫劳伦"
                ],
                patterns=[
                    r".*[上衣下装外套裤子裙子].*",
                    r".*[T恤衬衫卫衣毛衣].*",
                    r".*[夹克风衣大衣羽绒服].*",
                    r".*[连衣裙半身裙].*",
                    r".*[牛仔休闲运动西装正装].*"
                ],
                weight=1.0
            ),
            
            # 配饰类 AP
            ProductCategory.AP: ClassificationRule(
                category=ProductCategory.AP,
                keywords=[
                    "手表", "腕表", "智能手表", "电子表", "机械表", "石英表",
                    "项链", "手链", "手镯", "戒指", "指环", "耳环", "耳钉", "耳坠",
                    "胸针", "别针", "发夹", "发带", "头饰", "发圈", "头花",
                    "帽子", "棒球帽", "鸭舌帽", "贝雷帽", "毛线帽", "遮阳帽", "渔夫帽",
                    "围巾", "丝巾", "披肩", "头巾", "领带", "领结", "领巾",
                    "皮带", "腰带", "吊带", "背带", "手套", "袖套",
                    "太阳镜", "墨镜", "眼镜", "老花镜", "近视镜", "隐形眼镜",
                    "配饰", "饰品", "首饰", "珠宝", "装饰品"
                ],
                brand_keywords=[
                    "劳力士", "欧米茄", "浪琴", "天梭", "卡西欧", "精工", "西铁城",
                    "卡地亚", "蒂芙尼", "宝格丽", "梵克雅宝", "周大福", "周生生",
                    "雷朋", "奥克利", "古驰", "普拉达", "路易威登", "爱马仕"
                ],
                patterns=[
                    r".*[手表腕表].*",
                    r".*[项链手链戒指耳环].*",
                    r".*[帽子围巾皮带].*",
                    r".*[太阳镜墨镜眼镜].*",
                    r".*[配饰饰品首饰].*"
                ],
                weight=1.0
            ),
            
            # 箱包类 LA
            ProductCategory.LA: ClassificationRule(
                category=ProductCategory.LA,
                keywords=[
                    "包", "包包", "手提包", "单肩包", "双肩包", "斜挎包", "背包",
                    "钱包", "钱夹", "卡包", "零钱包", "手拿包", "晚宴包", "手包",
                    "公文包", "电脑包", "商务包", "书包", "学生包", "旅行包", "行李包",
                    "拉杆箱", "行李箱", "旅行箱", "登机箱", "托运箱", "硬箱", "软箱",
                    "化妆包", "洗漱包", "收纳包", "购物袋", "帆布袋", "环保袋",
                    "腰包", "胸包", "挎包", "邮差包", "信封包", "贝壳包", "水桶包",
                    "箱包", "皮具", "皮包", "真皮包", "帆布包"
                ],
                brand_keywords=[
                    "LV", "路易威登", "古驰", "GUCCI", "普拉达", "PRADA", "爱马仕", "HERMES",
                    "香奈儿", "CHANEL", "迪奥", "DIOR", "芬迪", "FENDI", "巴黎世家",
                    "新秀丽", "美旅", "外交官", "戴尔蒙", "卡拉羊", "爱华仕"
                ],
                patterns=[
                    r".*包.*",
                    r".*[手提单肩双肩斜挎背]包.*",
                    r".*[拉杆行李旅行登机]箱.*",
                    r".*[钱包卡包零钱包].*",
                    r".*[箱包皮具皮包].*"
                ],
                weight=1.0
            ),
            
            # 鞋子类 SS
            ProductCategory.SS: ClassificationRule(
                category=ProductCategory.SS,
                keywords=[
                    "鞋", "鞋子", "运动鞋", "跑鞋", "篮球鞋", "足球鞋", "网球鞋", "板鞋",
                    "休闲鞋", "帆布鞋", "小白鞋", "老爹鞋", "厚底鞋", "增高鞋",
                    "皮鞋", "商务鞋", "正装鞋", "德比鞋", "牛津鞋", "布洛克鞋",
                    "高跟鞋", "尖头鞋", "圆头鞋", "方头鞋", "细跟鞋", "粗跟鞋", "坡跟鞋",
                    "平底鞋", "豆豆鞋", "乐福鞋", "玛丽珍鞋", "芭蕾鞋", "懒人鞋",
                    "靴子", "短靴", "长靴", "马丁靴", "雪地靴", "雨靴", "工装靴",
                    "凉鞋", "拖鞋", "洞洞鞋", "人字拖", "夹脚拖", "室内拖鞋",
                    "童鞋", "婴儿鞋", "学步鞋"
                ],
                brand_keywords=[
                    "NIKE", "ADIDAS", "PUMA", "NEW BALANCE", "CONVERSE", "VANS",
                    "李宁", "安踏", "特步", "361", "匹克", "乔丹", "鸿星尔克",
                    "Dr.Martens", "马丁", "UGG", "Timberland", "添柏岚",
                    "CHRISTIAN LOUBOUTIN", "Jimmy Choo", "Manolo Blahnik"
                ],
                patterns=[
                    r".*鞋.*",
                    r".*[运动跑步篮球足球网球]鞋.*",
                    r".*[休闲帆布皮鞋高跟平底]鞋.*",
                    r".*[靴子短靴长靴雪地靴].*",
                    r".*[凉鞋拖鞋].*"
                ],
                weight=1.0
            ),
            
            # 内衣类 NY
            ProductCategory.NY: ClassificationRule(
                category=ProductCategory.NY,
                keywords=[
                    "内衣", "文胸", "胸罩", "bra", "BRA", "聚拢", "无钢圈", "调整型",
                    "内裤", "三角裤", "平角裤", "丁字裤", "安全裤", "打底裤内穿",
                    "睡衣", "睡袍", "睡裙", "家居服", "居家服", "loungewear",
                    "保暖内衣", "秋衣", "秋裤", "长内衣", "长内裤", "打底衫内穿",
                    "丝袜", "连裤袜", "长筒袜", "中筒袜", "短袜", "船袜", "隐形袜",
                    "塑身衣", "塑形衣", "束腰", "美体", "收腹", "提臀",
                    "哺乳", "孕妇", "产后", "哺乳内衣", "孕妇内衣"
                ],
                brand_keywords=[
                    "维多利亚的秘密", "VS", "Calvin Klein", "CK", "TRIUMPH", "黛安芬",
                    "华歌尔", "爱慕", "古今", "曼妮芬", "都市丽人", "猫人",
                    "南极人", "恒源祥", "俞兆林", "浪莎", "宝娜斯", "桑蚕丝"
                ],
                patterns=[
                    r".*[内衣文胸胸罩].*",
                    r".*[内裤三角裤平角裤].*",
                    r".*[睡衣睡袍家居服].*",
                    r".*[保暖内衣秋衣秋裤].*",
                    r".*[丝袜连裤袜长筒袜].*",
                    r".*[塑身衣塑形衣束腰].*"
                ],
                weight=1.0
            )
        }
        
        return rules
    
    def _compile_patterns(self) -> Dict[ProductCategory, List]:
        """编译正则表达式模式"""
        compiled = {}
        for category, rule in self.rules.items():
            compiled[category] = [re.compile(pattern, re.IGNORECASE) for pattern in rule.patterns]
        return compiled
    
    def classify_product(
        self, 
        product_name: str, 
        brand: Optional[str] = None,
        description: Optional[str] = None
    ) -> Tuple[ProductCategory, float]:
        """
        对产品进行分类
        
        Args:
            product_name: 产品名称
            brand: 品牌名称
            description: 产品描述
            
        Returns:
            Tuple[分类结果, 置信度]
        """
        if not product_name:
            return ProductCategory.OTHER, 0.0
        
        # 合并所有文本进行分析
        text_to_analyze = product_name.lower()
        if brand:
            text_to_analyze += f" {brand.lower()}"
        if description:
            text_to_analyze += f" {description.lower()}"
        
        category_scores = {}
        
        for category, rule in self.rules.items():
            score = 0.0
            
            # 关键词匹配
            keyword_matches = sum(1 for keyword in rule.keywords if keyword in text_to_analyze)
            score += keyword_matches * 2.0
            
            # 品牌关键词匹配
            if brand:
                brand_matches = sum(1 for brand_kw in rule.brand_keywords 
                                 if brand_kw.lower() in text_to_analyze)
                score += brand_matches * 3.0
            
            # 正则模式匹配
            pattern_matches = sum(1 for pattern in self.compiled_patterns.get(category, [])
                                if pattern.search(text_to_analyze))
            score += pattern_matches * 1.5
            
            # 应用权重
            score *= rule.weight
            
            if score > 0:
                category_scores[category] = score
        
        if not category_scores:
            return ProductCategory.OTHER, 0.0
        
        # 找到得分最高的分类
        best_category = max(category_scores.keys(), key=lambda k: category_scores[k])
        best_score = category_scores[best_category]
        
        # 计算置信度（标准化到0-1之间）
        total_score = sum(category_scores.values())
        confidence = best_score / total_score if total_score > 0 else 0.0
        confidence = min(confidence, 1.0)
        
        # 如果最高分太低，返回OTHER分类
        if best_score < 1.0:
            return ProductCategory.OTHER, confidence * 0.5
        
        return best_category, confidence
    
    def get_category_keywords(self, category: ProductCategory) -> List[str]:
        """获取指定分类的关键词"""
        rule = self.rules.get(category)
        return rule.keywords if rule else []
    
    def get_all_categories(self) -> List[ProductCategory]:
        """获取所有可用分类"""
        return list(self.rules.keys())
    
    def explain_classification(
        self, 
        product_name: str, 
        brand: Optional[str] = None,
        description: Optional[str] = None
    ) -> Dict:
        """
        解释分类结果
        
        返回详细的分类分析信息
        """
        if not product_name:
            return {"error": "产品名称不能为空"}
        
        text_to_analyze = product_name.lower()
        if brand:
            text_to_analyze += f" {brand.lower()}"
        if description:
            text_to_analyze += f" {description.lower()}"
        
        analysis = {
            "input_text": text_to_analyze,
            "category_analysis": {},
            "final_result": None
        }
        
        for category, rule in self.rules.items():
            matched_keywords = [kw for kw in rule.keywords if kw in text_to_analyze]
            matched_brand_keywords = [kw for kw in rule.brand_keywords if kw.lower() in text_to_analyze] if brand else []
            matched_patterns = []
            
            for i, pattern in enumerate(self.compiled_patterns.get(category, [])):
                if pattern.search(text_to_analyze):
                    matched_patterns.append(rule.patterns[i])
            
            score = len(matched_keywords) * 2.0 + len(matched_brand_keywords) * 3.0 + len(matched_patterns) * 1.5
            score *= rule.weight
            
            analysis["category_analysis"][category.value] = {
                "score": score,
                "matched_keywords": matched_keywords,
                "matched_brand_keywords": matched_brand_keywords,
                "matched_patterns": matched_patterns
            }
        
        # 获取最终分类结果
        final_category, confidence = self.classify_product(product_name, brand, description)
        analysis["final_result"] = {
            "category": final_category.value,
            "confidence": confidence
        }
        
        return analysis