#!/usr/bin/env python3
"""
调试POPPY 626货号匹配问题
"""

import sys
import os
import re

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def debug_poppy():
    """调试POPPY 626匹配"""
    
    test_text = "某品牌商品名称 POPPY 626 代购MC"
    
    # 从ProductCodeExtractor中复制的模式列表
    code_patterns = [
        (r'\d{2,}\.\d{3,}', "小数点格式"),
        (r'[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9]+(?:-[A-Za-z0-9]+)*', "多段-连接"),
        (r'\d{3,}[-]\d{2,}', "数字-数字格式"),
        (r'(?<!\d\.)\d{6,}(?:[/]\d{6,})+(?!\.\d)', "Burberry /分隔"),
        (r'(?<!\d\.)\d{6,}(?:[\s/]\d{6,})+(?!\.\d)', "Burberry 混合"),
        (r'[A-Z]\d+[A-Z]+\d+\s+[A-Z]\d+', "Jil Sander格式"),
        (r'(?<!\d\.)\d{3,}(?:\s+[A-Za-z0-9]{3,})+(?!\.\d)', "Balenciaga格式"),
        (r'(?<!\d\.)\d{3,}(?:\s+\d{3,})+(?!\.\d)', "McQueen 空格分隔"),
        (r'[A-Z]{3,}(?:\s+[A-Z]{3,}){1,3}', "2-4个大写单词"),
        (r'[A-Z]+\s+[A-Z]\d+', "字母+空格+字母+数字"),
        (r'[A-Z][a-z]+\s+[A-Z][a-z]+', "首字母大写两单词"),
        (r'[A-Z]+\s+[A-Z]+', "两个大写单词"),
        (r'[A-Z]+\d+\s+\d{3}', "字母数字+空格+数字"),
        (r'[A-Z]+\d+[-][A-Z]+', "字母数字-字母"),
        (r'[A-Z]{2,}\s+\d{3}', "2位字母+空格+3位数字"),
        (r'[A-Z]+[-][A-Z]+', "字母-字母"),
        (r'[A-Z]+[-]\w+\s+\d{3,}', "字母-字母+空格+数字"),
        (r'[A-Z]\d+[A-Z]+\d+', "字母+数字+字母+数字"),
        (r'[A-Za-z]+\d{3,}[A-Za-z\d]*', "字母+3位数字"),
        (r'[A-Z]{2,}\d{3,}', "2位字母+3位数字"),
        (r'[A-Z]{4,}\d{1,2}', "4位字母+1-2位数字"),
        (r'(?<!\d)\d{6,8}(?!\d)', "6-8位纯数字"),
        (r'\b\d{4,5}\b(?!\.\d)', "4-5位纯数字"),
        (r'(?<=[^A-Za-z])[A-Z]{4,6}(?=[^A-Za-z])', "4-6位纯字母"),
        (r'\b[A-Za-z]+\d+[A-Za-z\d]*\b', "字母数字组合"),
        (r'\b\d+[A-Za-z]+[A-Za-z\d]*\b', "数字字母组合"),
    ]
    
    print(f"=== POPPY 626 匹配调试 ===")
    print(f"测试文本: '{test_text}'")
    print("匹配结果:")
    
    found_any = False
    for pattern, description in code_patterns:
        matches = re.findall(pattern, test_text)
        if matches:
            print(f"  ✅ {description}: {matches}")
            found_any = True
    
    if not found_any:
        print("  ❌ 没有找到匹配的模式")
    
    # 测试ProductCodeExtractor的实际处理
    print("\n=== 实际处理过程 ===")
    extractor = ProductCodeExtractor()
    
    # 模拟处理过程
    bracket_result = extractor.bracket_processor.process_bracket_content(test_text)
    print(f"方括号处理: '{bracket_result['cleaned_text']}'")
    
    clean_name = extractor._remove_brand(bracket_result["cleaned_text"], None)
    print(f"品牌移除后: '{clean_name}'")
    
    # 测试每个匹配的结果是否被_is_brand_name过滤
    print("\n=== 品牌/颜色过滤测试 ===")
    test_codes = ["POPPY 626", "POPPY", "626"]
    for code in test_codes:
        is_brand = extractor._is_brand_name(code)
        print(f"'{code}' 被识别为品牌/颜色: {is_brand}")
    
    # 最终提取结果
    result = extractor.extract_product_code(test_text, None)
    print(f"\n最终提取: '{result}'")

if __name__ == "__main__":
    debug_poppy()