#!/usr/bin/env python3
"""
测试长货号格式：MOSS MAI BOXY TEE
"""

import sys
import os

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def test_long_product_code():
    """测试长货号格式"""
    extractor = ProductCodeExtractor()
    
    test_case = {
        'name': "小王国 Rag & Bone绿色短款圆领短袖T恤MOSS MAI BOXY TEE8.13",
        'expected': "MOSS MAI BOXY TEE",
        'format': "4个大写单词"
    }
    
    print("=== 长货号格式测试 ===")
    print(f"商品名称: {test_case['name']}")
    print(f"期望货号: {test_case['expected']}")
    
    product_code = extractor.extract_product_code(test_case['name'])
    print(f"实际货号: {product_code}")
    
    if product_code == test_case['expected']:
        print("✅ 匹配成功")
    else:
        print("❌ 不匹配！")
        
        # 详细调试
        print("\n=== 详细调试 ===")
        import re
        
        # 测试不同的多单词模式
        patterns_to_test = [
            r'[A-Z]{3,}(?:\s+[A-Z]{3,}){1,2}',   # 当前的2-3个单词模式
            r'[A-Z]{3,}(?:\s+[A-Z]{3,}){1,3}',   # 2-4个单词模式
            r'[A-Z]{3,}(?:\s+[A-Z]{3,}){2,3}',   # 3-4个单词模式
            r'[A-Z]{3,}(?:\s+[A-Z]{3,}){3}',     # 正好4个单词
            r'[A-Z]{2,}\s+[A-Z]{2,}\s+[A-Z]{2,}\s+[A-Z]{2,}', # 明确的4个单词
        ]
        
        clean_name = test_case['name']
        for i, pattern in enumerate(patterns_to_test, 1):
            matches = re.findall(pattern, clean_name)
            print(f"模式 {i} ({pattern}): {matches}")

if __name__ == "__main__":
    test_long_product_code()