#!/usr/bin/env python3
"""
调试货号模式匹配过程
"""

import sys
import os
import re

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def debug_pattern_matching():
    """调试货号模式匹配"""
    
    test_texts = [
        "大童 棉质 LOGO马标 长袖T 32384 MC",  # Polo处理后的文本
        "女款 带帽 面包羽绒服夹克 J40AF0131 J70122 MC"  # Jil Sander处理后的文本
    ]
    
    # 从ProductCodeExtractor中复制的模式列表
    code_patterns = [
        (r'\d{2,}\.\d{3,}', "小数点格式"),
        (r'[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9]+(?:-[A-Za-z0-9]+)*', "多段-连接"),
        (r'\d{3,}[-]\d{2,}', "数字-数字格式"),
        (r'(?<!\d\.)\d{6,}(?:[/]\d{6,})+(?!\.\d)', "Burberry /分隔"),
        (r'(?<!\d\.)\d{6,}(?:[\s/]\d{6,})+(?!\.\d)', "Burberry 混合"),
        (r'(?<!\d\.)\d{3,}(?:\s+[A-Za-z0-9]{3,})+(?!\.\d)', "Balenciaga格式"),
        (r'(?<!\d\.)\d{3,}(?:\s+\d{3,})+(?!\.\d)', "McQueen 空格分隔"),
        (r'[A-Z]{3,}(?:\s+[A-Z]{3,}){1,3}', "2-4个大写单词"),
        (r'[A-Z]+\s+[A-Z]\d+', "字母+空格+字母+数字"),
        (r'[A-Z][a-z]+\s+[A-Z][a-z]+', "首字母大写两单词"),
        (r'[A-Z]+\s+[A-Z]+', "两个大写单词"),
        (r'[A-Z]+\d+\s+\d{3}', "字母数字+空格+数字"),
        (r'[A-Z]+\d+[-][A-Z]+', "字母数字-字母"),
        (r'[A-Z]+\s+\d{3}', "字母+空格+3位数字"),
        (r'[A-Z]+[-][A-Z]+', "字母-字母"),
        (r'[A-Z]+[-]\w+\s+\d{3,}', "字母-字母+空格+数字"),
        (r'[A-Z]\d+[A-Z]+\d+', "字母+数字+字母+数字"),
        (r'[A-Za-z]+\d{3,}[A-Za-z\d]*', "字母+3位数字"),
        (r'[A-Z]{2,}\d{3,}', "2位字母+3位数字"),
        (r'[A-Z]{4,}\d{1,2}', "4位字母+1-2位数字"),
        (r'(?<!\d)\d{6,8}(?!\d)', "6-8位纯数字"),
        (r'\b\d{4,5}\b(?!\.\d)', "4-5位纯数字"),
        (r'(?<=[^A-Za-z])[A-Z]{4,6}(?=[^A-Za-z])', "4-6位纯字母"),
        (r'\b[A-Za-z]+\d+[A-Za-z\d]*\b', "字母数字组合"),
        (r'\b\d+[A-Za-z]+[A-Za-z\d]*\b', "数字字母组合"),
    ]
    
    print("=== 货号模式匹配调试 ===")
    
    for i, text in enumerate(test_texts, 1):
        print(f"\n测试文本 {i}: '{text}'")
        print("匹配结果:")
        
        found_any = False
        for pattern, description in code_patterns:
            matches = re.findall(pattern, text)
            if matches:
                print(f"  ✅ {description}: {matches}")
                found_any = True
        
        if not found_any:
            print("  ❌ 没有找到匹配的模式")
        
        # 测试具体的数字匹配
        print("\n具体数字模式测试:")
        specific_patterns = [
            (r'32384', "精确匹配32384"),
            (r'\d{5}', "5位数字"),
            (r'\b\d{5}\b', "边界内5位数字"),
            (r'(?<!\d)\d{5}(?!\d)', "非数字环绕的5位数字"),
        ]
        
        for pattern, desc in specific_patterns:
            matches = re.findall(pattern, text)
            if matches:
                print(f"    ✅ {desc}: {matches}")

if __name__ == "__main__":
    debug_pattern_matching()