#!/usr/bin/env python3
"""
修复品牌解析问题
- 添加缺失的品牌到别名表
- 更新品牌解析逻辑
"""

import asyncio
import sqlite3
from typing import List, Tuple

# 需要添加的品牌别名映射
BRAND_ALIASES_TO_ADD = [
    # 格式: (alias, canonical_brand, confidence)
    
    # 多词品牌
    ('Tom Ford', 'Tom Ford', 100),
    ('Ader Error', 'Ader Error', 100),
    ('La Prairie', 'La Prairie', 100),
    ('LA MER', 'La Mer', 100),
    ('La Mer', 'La Mer', 100),
    ('Isabel Marant', 'Isabel Marant', 100),
    ('ISABEL MARANT', 'Isabel Marant', 100),
    ('Mihara Yasuhiro', 'Mihara Yasuhiro', 100),
    ('MIHARA YASUHIRO', 'Mihara Yasuhiro', 100),
    ('三原康裕', 'Mihara Yasuhiro', 100),
    ('Rag & Bone', 'Rag & Bone', 100),
    ('Rag&Bone', 'Rag & Bone', 100),
    
    # 单词品牌
    ('Cellcosmet', 'Cellcosmet', 100),
    ('瑞妍', 'Cellcosmet', 100),
    ('Jansport', 'Jansport', 100),
    ('杰斯伯', 'Jansport', 100),
    ('Woolimax', 'Woolimax', 100),
    ('Westman', 'Westman Atelier', 100),
    ('Valmont', 'Valmont', 100),
    ('法尔曼', 'Valmont', 100),
    ('Toteme', 'Toteme', 100),
    ('TOTEME', 'Toteme', 100),
    ('SKIMS', 'SKIMS', 100),
    ('Reveal', 'Reveal', 100),
    ('REVEAL', 'Reveal', 100),
    ('PRADA', 'Prada', 100),
    ('Prada', 'Prada', 100),
    ('PANE', 'PANE', 100),
    ('Pane', 'PANE', 100),
    ('NANAMICA', 'Nanamica', 100),
    ('Nanamica', 'Nanamica', 100),
    ('Longchamp', 'Longchamp', 100),
    ('LONGCHAMP', 'Longchamp', 100),
    ('Lanvin', 'Lanvin', 100),
    ('浪凡', 'Lanvin', 100),
    ('MFK', 'Maison Francis Kurkdjian', 100),
    ('Fendi', 'Fendi', 100),
    ('FENDI', 'Fendi', 100),
    ('Celine', 'Celine', 100),
    ('CELINE', 'Celine', 100),
    ('Bogner', 'Bogner', 100),
    ('博格纳', 'Bogner', 100),
    ('Balmain', 'Balmain', 100),
    ('BALMAIN', 'Balmain', 100),
    ('BARROW', 'Barrow', 100),
    ('Barrow', 'Barrow', 100),
    
    # 始祖鸟的各种写法
    ('Arcteryx', 'Arc\'teryx', 100),
    ('ArcTeryx', 'Arc\'teryx', 100),
    ('Arc\'teryx', 'Arc\'teryx', 100),
    ('始祖鸟', 'Arc\'teryx', 100),
    
    # 祖马龙
    ('祖马龙', 'Jo Malone', 100),
    ('Jo Malone', 'Jo Malone', 100),
    
    # 海蓝之谜
    ('海蓝之谜', 'La Mer', 100),
    
    # 莱珀妮
    ('莱珀妮', 'La Prairie', 100),
]

# 需要更新到忽略列表的词汇
IGNORE_KEYWORDS_TO_ADD = [
    'GN现货', 'GN', '美国代购', '代购', '美国采购', '采购',
    '现货', '小王国', 'MC', 'LA', 'SS', 'AT', 'AP',
    '月到期', '天发货', '可验', '蓝码可验', '无盒', '院装'
]


async def fix_brand_aliases():
    """修复品牌别名表"""
    
    db_path = '/Users/jinjunqian/PycharmProjects/订单处理新版/ordersys/backend/ordersys.db'
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    try:
        # 添加品牌别名
        added_count = 0
        skipped_count = 0
        
        for alias, canonical, confidence in BRAND_ALIASES_TO_ADD:
            # 检查是否已存在
            cursor.execute(
                "SELECT id FROM brand_aliases WHERE alias = ?",
                (alias,)
            )
            existing = cursor.fetchone()
            
            if existing:
                print(f"跳过已存在的别名: {alias}")
                skipped_count += 1
            else:
                # 插入新别名
                cursor.execute(
                    """
                    INSERT INTO brand_aliases 
                    (alias, canonical_brand, confidence, is_confirmed, created_at)
                    VALUES (?, ?, ?, 'confirmed', datetime('now'))
                    """,
                    (alias, canonical, confidence)
                )
                print(f"添加品牌别名: {alias} -> {canonical}")
                added_count += 1
        
        conn.commit()
        print(f"\n✅ 完成! 添加了 {added_count} 个新别名，跳过了 {skipped_count} 个已存在的别名")
        
        # 显示当前品牌别名统计
        cursor.execute("SELECT COUNT(*) FROM brand_aliases WHERE is_confirmed = 'confirmed'")
        total_confirmed = cursor.fetchone()[0]
        print(f"📊 当前已确认的品牌别名总数: {total_confirmed}")
        
    except Exception as e:
        print(f"❌ 错误: {e}")
        conn.rollback()
    finally:
        conn.close()


def update_text_parser():
    """更新文本解析器代码"""
    
    print("\n📝 需要更新 text_parser.py 中的以下内容：")
    print("\n1. 在 ignore_keywords 中添加:")
    print("   ", IGNORE_KEYWORDS_TO_ADD)
    
    print("\n2. 在 known_multi_brands 列表中添加:")
    multi_brands = [
        'Tom Ford', 'Ader Error', 'La Prairie', 'La Mer',
        'Isabel Marant', 'Mihara Yasuhiro', 'Maison Francis Kurkdjian',
        'Jo Malone', 'Westman Atelier'
    ]
    print("   ", multi_brands)
    
    print("\n3. 在 single_word_brands 列表中添加:")
    single_brands = [
        'Cellcosmet', 'Jansport', 'Woolimax', 'Valmont', 
        'Toteme', 'SKIMS', 'Reveal', 'PANE', 'Nanamica',
        'Longchamp', 'Lanvin', 'MFK', 'Fendi', 'Celine',
        'Bogner', 'Balmain', 'Barrow'
    ]
    print("   ", single_brands)
    
    print("\n4. 在 brand_replacements 字典中添加:")
    replacements = {
        'TomFord': 'Tom Ford',
        'AderError': 'Ader Error',
        'LaPrairie': 'La Prairie',
        'LaMer': 'La Mer',
        'IsabelMarant': 'Isabel Marant',
        'MiharaYasuhiro': 'Mihara Yasuhiro',
    }
    print("   ", replacements)


def test_brand_extraction():
    """测试品牌提取"""
    
    test_cases = [
        ("28/11月到期！Cellcosmet瑞妍 生机水500ML 精华液 院装 美国代购", "Cellcosmet"),
        ("30天发货！Tom Ford 金箔唇釉 美国代购", "Tom Ford"),
        ("Arcteryx 始祖鸟 女款BETA Jacket 冲锋衣 X000009239代购6.18MC", "Arc'teryx"),
        ("BARROW 男士 字母元素 拼色小熊 针织开衫 F4BWUAJK052 代购ss", "Barrow"),
        ("【蓝码可验】现货Ader Error拼接牛角扣超廓形针织开衫BKAFWGD08", "Ader Error"),
        ("美国采购 Jansport Benny Gold 限量联名款 双肩包 背包 T57A", "Jansport"),
        ("祖马龙 忧郁蓟草 香水 美国代购 11.1", "Jo Malone"),
        ("无盒 Cellcosmet瑞妍 活力生机面霜 院装150ML", "Cellcosmet"),
        ("现货 Woolimax 纯色松紧腰蓬松百褶裙 A字半裙伞裙 W25526 小王国", "Woolimax"),
        ("Valmont/法尔曼 防脱洗发水滋养头皮产后脱发院装", "Valmont"),
        ("Toteme 羊毛混纺女款黑色休闲西裤231-2007-250代购7.9MC", "Toteme"),
        ("SKIMS Boyfriend 女款宽松圆领T恤 AP-TSH-0462代购6.6 MC", "SKIMS"),
        ("小王国 Rag & Bone 绿色V领 棉混纺 短袖 T恤", "Rag & Bone"),
        ("PRADA 黑色 板材墨镜 0PR-15VS-1AB5S0 美国代购10.5MC", "Prada"),
        ("现货NANAMICA 男女款COOLMAX条纹休闲针织长袖T恤S25SH119小王国", "Nanamica"),
        ("现货 MIHARA YASUHIRO/三原康裕BAKER帆布低帮溶解运动鞋A02FW704", "Mihara Yasuhiro"),
        ("小王国 Longchamp 中号 双肩包 1699089P80 钢铁灰色", "Longchamp"),
        ("Lanvin浪凡 女款白尾厚底小白鞋 FW-SKLK07-NAPA", "Lanvin"),
        ("La Prairie/莱珀妮 粉水 凝润平衡柔肤水250ml", "La Prairie"),
        ("LA MER/海蓝之谜 碧玺焕亮洁面泡沫净透清洁洗面奶", "La Mer"),
        ("MFK 狂野麝香 /不羁香根草/至纯雅士 香水套装", "Maison Francis Kurkdjian"),
        ("小王国ISABEL MARANT 星球图案短袖T恤", "Isabel Marant"),
        ("Fendi 男款白色镂空拼接板鞋 7E1631-A1GV-F1MDO", "Fendi"),
        ("现货 Celine 金属细框方形太阳眼镜墨镜 40284U", "Celine"),
        ("小王国Bogner博格纳女款拼色带帽夹克MILVA 001", "Bogner"),
        ("Balmain 女款红色短款星星上衣 CF1EE090GD27", "Balmain"),
    ]
    
    print("\n🧪 测试案例：")
    for product_name, expected_brand in test_cases:
        print(f"\n商品: {product_name[:50]}...")
        print(f"期望品牌: {expected_brand}")


if __name__ == "__main__":
    print("🔧 开始修复品牌解析问题...")
    
    # 1. 修复品牌别名表
    asyncio.run(fix_brand_aliases())
    
    # 2. 显示需要更新的代码
    update_text_parser()
    
    # 3. 显示测试案例
    test_brand_extraction()
    
    print("\n✅ 修复脚本执行完成!")
    print("⚠️  请记得：")
    print("1. 手动更新 text_parser.py 中的相关列表")
    print("2. 重启后端服务使更改生效")
    print("3. 重新生成产品主表以应用新的品牌解析")