#!/usr/bin/env python3
"""
详细调试小数点货号提取过程
"""

import sys
import os
import re

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.utils.text_parser import ProductCodeExtractor

def debug_decimal_extraction():
    """详细调试小数点货号提取过程"""
    extractor = ProductCodeExtractor()
    
    test_name = "On昂跑 Cloud X 3女款缓震日常训练运动鞋60.98098代购3.12"
    expected = "60.98098"
    
    print(f"原始商品名: {test_name}")
    print(f"期望货号: {expected}")
    
    # 1. 处理方括号内容
    bracket_result = extractor.bracket_processor.process_bracket_content(test_name)
    print(f"\n1. 方括号处理结果: {bracket_result}")
    
    # 2. 清理品牌名
    clean_name = extractor._remove_brand(bracket_result["cleaned_text"], None)
    print(f"2. 清理品牌后: '{clean_name}'")
    
    # 3. 应用各个模式 - 使用实际的模式列表
    code_patterns = [
        r'\d{2,}\.\d{3,}',               # 小数点格式货号（如60.98098）- 至少2位.至少3位，优先匹配
        r'[A-Z]{3,}(?:\s+[A-Z]{3,}){1,2}', # 2-3个大写单词
        r'[A-Z][a-z]+\s+[A-Z][a-z]+',    # 首字母大写的两个单词
        r'[A-Z]+\s+[A-Z]+',              # 两个大写单词
        r'[A-Z]+\s+\d{3}',               # 大写字母+空格+3位数字
        r'[A-Z]+[-][A-Z]+',              # 大写字母-大写字母
        r'[A-Z]+[-]\w+\s+\d{3,}',        # 字母-字母+空格+数字
        r'[A-Z]\d+[A-Z]+\d+',            # 混合字母数字格式
        r'[A-Za-z]+\d{3,}[A-Za-z\d]*',   # 字母开头后接3位以上数字
        r'[A-Z]{2,}\d{3,}',              # 至少2个大写字母+3位以上数字
        r'[A-Z]{4,}\d{1,2}',             # 4位以上大写字母+1-2位数字
        r'(?<!\d)\d{6,8}(?!\d)',         # 6-8位纯数字
        r'\b\d{4,5}\b(?!\.\d)',          # 4-5位纯数字
        r'(?<=[^A-Za-z])[A-Z]{4,6}(?=[^A-Za-z])', # 4-6位纯大写字母
        r'\b[A-Za-z]+\d+[A-Za-z\d]*\b',  # 有明确边界的字母数字组合
        r'\b\d+[A-Za-z]+[A-Za-z\d]*\b',  # 数字开头的字母数字组合
    ]
    
    print(f"\n3. 模式匹配:")
    all_matches = []
    for i, pattern in enumerate(code_patterns, 1):
        matches = re.findall(pattern, clean_name)
        print(f"模式 {i:2d} ({pattern}): {matches}")
        all_matches.extend(matches)
    
    print(f"\n4. 所有匹配: {all_matches}")
    
    # 4. 应用过滤
    print(f"\n5. 过滤测试:")
    filtered_matches = []
    for match in all_matches:
        is_brand = extractor._is_brand_name(match)
        print(f"匹配 '{match}' -> 是品牌名: {is_brand}")
        if not is_brand:
            filtered_matches.append(match)
    
    print(f"\n6. 过滤后匹配: {filtered_matches}")
    
    # 5. 最终结果
    result = extractor.extract_product_code(test_name)
    print(f"\n7. 最终结果: '{result}'")
    
    # 6. 测试_is_brand_name对特定值的判断
    print(f"\n8. 特定测试:")
    print(f"_is_brand_name('60.98098'): {extractor._is_brand_name('60.98098')}")
    print(f"_is_brand_name('3.12'): {extractor._is_brand_name('3.12')}")
    print(f"_is_brand_name('98098'): {extractor._is_brand_name('98098')}")

if __name__ == "__main__":
    debug_decimal_extraction()