#!/usr/bin/env python3
"""
详细调试缺失货号的原因
"""

import sys
import os
import re

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def debug_extraction_process(name, brand, expected):
    """详细调试提取过程"""
    extractor = ProductCodeExtractor()
    
    print(f"\n=== 调试 {name} ===")
    print(f"期望货号: {expected}")
    print(f"品牌: {brand}")
    
    # 1. 方括号处理
    bracket_result = extractor.bracket_processor.process_bracket_content(name)
    print(f"\n1. 方括号处理:")
    print(f"   cleaned_text: '{bracket_result['cleaned_text']}'")
    print(f"   extracted_codes: {bracket_result['extracted_codes']}")
    
    # 2. 品牌移除
    clean_name = extractor._remove_brand(bracket_result["cleaned_text"], brand)
    print(f"\n2. 品牌移除后: '{clean_name}'")
    
    # 3. 测试所有模式
    print(f"\n3. 模式匹配结果:")
    code_patterns = [
        (r'\d{2,}\.\d{3,}', '小数点格式'),
        (r'[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9]+(?:-[A-Za-z0-9]+)*', '多段-连接格式'),
        (r'\d{3,}[-]\d{2,}', '数字-数字格式'),
        (r'(?<!\d\.)\d{6,}(?:[/]\d{6,})+(?!\.\d)', 'Burberry多颜色格式'),
        (r'(?<!\d\.)\d{6,}(?:[\s/]\d{6,})+(?!\.\d)', 'Burberry混合格式'),
        (r'[A-Z]\d+[A-Z]+\d+\s+[A-Z]\d+', 'Jil Sander格式'),
        (r'(?<!\d\.)\d{3,}(?:\s+[A-Za-z0-9]{3,})+(?!\.\d)', 'Balenciaga格式'),
        (r'(?<!\d\.)\d{3,}(?:\s+\d{3,})+(?!\.\d)', '多段数字格式'),
        (r'[A-Z]{3,}(?:\s+[A-Z]{3,}){1,3}', '2-4个大写单词'),
        (r'[A-Z]+\s+[A-Z]\d+', '字母+空格+字母+数字'),
        (r'[A-Z][a-z]+\s+[A-Z][a-z]+', '首字母大写的两个单词'),
        (r'[A-Z]+\s+[A-Z]+', '两个大写单词'),
        (r'[A-Z]+\d+\s+\d{3}', '字母数字+空格+数字'),
        (r'[A-Z]+\d+[-][A-Z]+', '字母数字-字母'),
        (r'[A-Z]{2,}\s+\d{3}', '至少2个大写字母+空格+3位数字'),
        (r'[A-Z]+[-][A-Z]+', '大写字母-大写字母'),
        (r'[A-Z]+[-]\w+\s+\d{3,}', '字母-字母+空格+数字'),
        (r'[A-Z]\d+[A-Z]+\d+', '混合字母数字格式'),
        (r'[A-Za-z]+\d{3,}[A-Za-z\d]*', '字母开头后接3位以上数字'),
        (r'[A-Z]{2,}\d{3,}', '至少2个大写字母+3位以上数字'),
        (r'[A-Z]{4,}\d{1,2}', '4位以上大写字母+1-2位数字'),
        (r'(?<!\d)\d{6,8}(?!\d)', '6-8位纯数字'),
        (r'\b\d{4,5}\b(?!\.\d)', '4-5位纯数字'),
        (r'(?<=[^A-Za-z])[A-Z]{4,6}(?=[^A-Za-z])', '4-6位纯大写字母'),
        (r'\b[A-Za-z]+\d+[A-Za-z\d]*\b', '有明确边界的字母数字组合'),
        (r'\b\d+[A-Za-z]+[A-Za-z\d]*\b', '数字开头的字母数字组合'),
    ]
    
    codes = []
    for pattern, desc in code_patterns:
        matches = re.findall(pattern, clean_name)
        if matches:
            print(f"   [{desc}] {pattern}: {matches}")
            for match in matches:
                # 检查是否被品牌名过滤
                is_brand = extractor._is_brand_name(match)
                print(f"      '{match}' -> is_brand_name: {is_brand}")
                if not is_brand:
                    codes.append(match)
    
    print(f"\n4. 通过品牌过滤后的codes: {codes}")
    
    # 5. 最终结果
    result = extractor.extract_product_code(name, brand)
    print(f"\n5. 最终结果: {result}")
    print(f"   匹配期望: {result == expected}")

def main():
    test_cases = [
        ("现货 Anine Bing 小logo字母 鸭舌帽9084  小王国", "Anine Bing", "9084"),
        ("ArcTeryx始祖鸟 男款Cormac小鸟标透气防晒速干短袖T恤9718代购M", "ArcTeryx", "9718"),
        ("Ralph Lauren 拉夫劳伦男士logo刺绣拉链夹克秋冬防风冲锋衣710548506002/003 卡其色 S", "Ralph Lauren", "710548506002/003"),
        ("现货 MaxMara Weekend纯棉字母数字印花短袖T恤 Teiera 小王国", "MaxMara", "Teiera"),
        ("CANADA GOOSE/加拿大鹅 男款徽标马甲2054M 代购 CA 4.29 MC", "CANADA GOOSE", "2054M"),
    ]
    
    for name, brand, expected in test_cases:
        debug_extraction_process(name, brand, expected)
        print("\n" + "="*80)

if __name__ == "__main__":
    main()