#!/usr/bin/env python3
"""
测试新的复杂货号格式：
1. 多段-连接的字母数字格式（通用）
2. Balenciaga空格分隔的字母数字格式
"""

import sys
import os

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def test_new_formats():
    """测试新的复杂货号格式"""
    extractor = ProductCodeExtractor()
    
    test_cases = [
        # 1. 多段-连接的字母数字格式（通用所有品牌）
        {
            'name': "Lanvin 女款 黑色翻毛皮CURB运动鞋FW-SKDK0B-VESU小王国代购8.10",
            'expected': "FW-SKDK0B-VESU",
            'current_extracted': None,
            'format': "多段-连接字母数字（通用格式）"
        },
        # 更多-连接格式的例子
        {
            'name': "Prada 男款皮鞋 2EE368-3AUF-F0002 代购8.12",
            'expected': "2EE368-3AUF-F0002",
            'current_extracted': None,
            'format': "三段-连接字母数字格式"
        },
        {
            'name': "Gucci 女包 GG-456123-AB89代购8.14",
            'expected': "GG-456123-AB89",
            'current_extracted': None,
            'format': "字母-数字-字母数字连接"
        },
        
        # 2. Balenciaga空格分隔的字母数字格式（类似McQueen但支持字母数字混合）
        {
            'name': "Balenciaga 女款 Cargo 做旧 运动鞋 785756 W2MU1代购8.15M",
            'expected': "785756 W2MU1",
            'current_extracted': None,
            'format': "Balenciaga数字+空格+字母数字"
        },
        # 更多Balenciaga格式的例子
        {
            'name': "Balenciaga 男款运动鞋 123456 ABC789 代购8.16",
            'expected': "123456 ABC789",
            'current_extracted': None,
            'format': "Balenciaga数字+空格+字母数字"
        },
        {
            'name': "Balenciaga 女包 987654 XY12Z3 AB45代购8.17",
            'expected': "987654 XY12Z3 AB45",
            'current_extracted': None,
            'format': "Balenciaga三段空格分隔"
        }
    ]
    
    print("=== 新复杂货号格式测试 ===")
    print("1. 多段-连接格式（通用）")
    print("2. Balenciaga空格分隔字母数字格式（避免日期）")
    print()
    
    for i, case in enumerate(test_cases, 1):
        print(f"测试 {i}: [{case['format']}]")
        print(f"商品名称: {case['name']}")
        print(f"期望货号: {case['expected']}")
        
        # 提取品牌
        brand = None
        if "Lanvin" in case['name']:
            brand = "Lanvin"
        elif "Prada" in case['name']:
            brand = "Prada"
        elif "Gucci" in case['name']:
            brand = "Gucci"
        elif "Balenciaga" in case['name']:
            brand = "Balenciaga"
        
        # 测试当前提取效果
        current_code = extractor.extract_product_code(case['name'], brand)
        case['current_extracted'] = current_code
        print(f"当前提取: {current_code}")
        
        if current_code == case['expected']:
            print("✅ 当前逻辑已支持")
        else:
            print("❌ 需要增强支持")
            
            # 分析需要的模式
            expected = case['expected']
            if '-' in expected and expected.count('-') >= 2:
                print("  需要模式: 多段-连接的字母数字格式")
            elif ' ' in expected and any(char.isalpha() for char in expected):
                print("  需要模式: 空格分隔的字母数字混合格式")
        print()
    
    print("=== 当前支持情况总结 ===")
    supported = sum(1 for case in test_cases if case['current_extracted'] == case['expected'])
    total = len(test_cases)
    print(f"支持格式: {supported}/{total} ({supported/total*100:.1f}%)")
    
    if supported < total:
        print("\n需要增加的模式:")
        unsupported_formats = [case['format'] for case in test_cases if case['current_extracted'] != case['expected']]
        for fmt in set(unsupported_formats):
            print(f"- {fmt}")

if __name__ == "__main__":
    test_new_formats()