#!/usr/bin/env python3
"""
测试Burberry品牌多颜色货号格式分析
"""

import sys
import os

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def test_burberry_patterns():
    """测试Burberry货号格式识别"""
    extractor = ProductCodeExtractor()
    
    # 根据用户描述，Burberry货号特点：
    # 1. 一般是一组数字
    # 2. 多个颜色的货号，数字当中用空格或者/来分隔
    # 3. 整个货号，如果当中包含空格和/，也要一起放进去
    # 4. 结尾的日期不要放进去
    
    test_cases = [
        # 现有的单一货号格式
        {
            'name': "GN现货 Burberry 女款黄色起球设计开衫80776841 小王国",
            'expected': "80776841",
            'current_extracted': None,  # 将由测试填入
            'format': "单一8位数字货号"
        },
        {
            'name': "小王国 BUrberry 米色条纹长袖开衫 8084328 美国代购8.14MC",
            'expected': "8084328", 
            'current_extracted': None,
            'format': "单一7位数字货号"
        },
        
        # 多颜色格式（假设的格式）
        {
            'name': "Burberry 女款经典格纹围巾 80776841 80776842 代购8.15",
            'expected': "80776841 80776842",
            'current_extracted': None,
            'format': "空格分隔的多颜色货号"
        },
        {
            'name': "Burberry 男款衬衫 8084328/8084329/8084330 代购8.16",
            'expected': "8084328/8084329/8084330",
            'current_extracted': None,
            'format': "/分隔的多颜色货号"
        },
        {
            'name': "Burberry 儿童款连帽衫 80123456 80123457 80123458 代购8.17",
            'expected': "80123456 80123457 80123458",
            'current_extracted': None,
            'format': "空格分隔的三色货号"
        },
        {
            'name': "Burberry 女款手袋 81234567/81234568 代购8.18",
            'expected': "81234567/81234568",
            'current_extracted': None,
            'format': "/分隔的双色货号"
        },
        {
            'name': "Burberry 经典风衣 90123456 90123457/90123458 代购8.19",
            'expected': "90123456 90123457/90123458",
            'current_extracted': None,
            'format': "混合分隔符货号"
        }
    ]
    
    print("=== Burberry多颜色货号格式分析 ===")
    print("目标：支持空格和/分隔的多个数字货号，避免匹配结尾日期")
    print()
    
    for i, case in enumerate(test_cases, 1):
        print(f"测试 {i}: [{case['format']}]")
        print(f"商品名称: {case['name']}")
        print(f"期望货号: {case['expected']}")
        
        # 测试当前提取效果
        current_code = extractor.extract_product_code(case['name'], "Burberry")
        case['current_extracted'] = current_code
        print(f"当前提取: {current_code}")
        
        if current_code == case['expected']:
            print("✅ 当前逻辑已支持")
        else:
            print("❌ 需要增强支持")
            
            # 分析需要的模式
            expected = case['expected']
            if ' ' in expected and '/' in expected:
                print("  需要模式: 混合空格和/分隔的多数字")
            elif ' ' in expected:
                print("  需要模式: 空格分隔的多数字")
            elif '/' in expected:
                print("  需要模式: /分隔的多数字")
        print()
    
    print("=== 当前支持情况总结 ===")
    supported = sum(1 for case in test_cases if case['current_extracted'] == case['expected'])
    total = len(test_cases)
    print(f"支持格式: {supported}/{total} ({supported/total*100:.1f}%)")
    
    if supported < total:
        print("\n需要增加的模式:")
        unsupported_formats = [case['format'] for case in test_cases if case['current_extracted'] != case['expected']]
        for fmt in set(unsupported_formats):
            print(f"- {fmt}")

if __name__ == "__main__":
    test_burberry_patterns()