#!/usr/bin/env python3
"""
Test product code extraction with enhanced logic
"""
import sys
import os
import re
sys.path.append('backend')

from app.utils.text_parser import ProductCodeExtractor

def test_extraction():
    """Test the enhanced product code extraction logic"""
    extractor = ProductCodeExtractor()
    
    # Test cases from user examples
    test_cases = [
        # User's new examples
        {
            "name": "SKIMS Boyfriend 女款宽松圆领T恤 AP-TSH-0462代购6.6 MC",
            "expected": "AP-TSH-0462",
            "description": "Should extract AP-TSH-0462 and ignore Boyfriend"
        },
        {
            "name": "Moose Knuckles女款灰色YUKON短款羽绒服M33LB041O 1003代购8.14M",
            "expected": "M33LB041O 1003", 
            "description": "Should extract M33LB041O 1003 and ignore YUKON"
        },
        
        # Previous problem cases
        {
            "name": "Alexander Wang 女款 黑色蕾丝细节黑色牛仔裤4DC1254613 8.8AT",
            "expected": "4DC1254613",
            "description": "Should extract 4DC1254613 and ignore 8.8AT"
        },
        {
            "name": "Alexander Wang Bodywear白色闪闪短款上衣 8C2238047T代购6.18MC",
            "expected": "8C2238047T", 
            "description": "Should extract 8C2238047T and ignore 6.18MC"
        },
        {
            "name": "UMA WANG 女款绿色拼接短袖连衣裙 UP5036 UW400 DUDI 8.8AT",
            "expected": "UP5036 UW400 DUDI",
            "description": "Should extract multi-part UMA WANG code"
        },
        {
            "name": "Tods女款黑色雕花细带布洛克皮鞋XXW60C0DE20EB5B999代购8.15LA",
            "expected": "XXW60C0DE20EB5B999",
            "description": "Should extract long alphanumeric code"
        },
        
        # Multi-part codes with different separators
        {
            "name": "Anine Bing 女款Karter Jogger米灰色小logo 卫裤A-03-10598-GRY1",
            "expected": "A-03-10598-GRY1", 
            "description": "Should extract dash-separated product code A-03-10598-GRY1"
        },
        {
            "name": "小王国 Acne Studios 中性款 魔术贴小白鞋 Steffey Friend 8.18",
            "expected": "Steffey Friend",
            "description": "Should extract space-separated product code Steffey Friend and ignore date 8.18"
        },
        {
            "name": "小王国 ST JOHN 紫色V领上衣 K91KQ21 8.17 LA",
            "expected": "K91KQ21",
            "description": "Should extract K91KQ21 and ignore date+procurement format 8.17 LA"
        },
        {
            "name": "小王国Rag & Bone条纹短袖T恤 MAI STRIPED BOXY TEE 8.13",
            "expected": "MAI STRIPED BOXY TEE",
            "description": "Should extract multi-word product code MAI STRIPED BOXY TEE and ignore date 8.13"
        },
        {
            "name": "小王国 Moose Knuckles男款蓝色狐狸毛短款羽绒服MK2000MB-402 MC",
            "expected": "MK2000MB-402",
            "description": "Should extract MK2000MB-402 and ignore procurement keyword MC"
        },
        {
            "name": "商品 ABC.DEF.123 产品描述",
            "expected": "ABC.DEF.123",
            "description": "Should extract dot-separated product code ABC.DEF.123"
        },
        
        # Date format exclusions
        {
            "name": "商品名称 8.19MC 其他文字",
            "expected": None,
            "description": "Should NOT extract date format 8.19MC"
        },
        {
            "name": "商品名称 11.27MC 其他文字",
            "expected": None,
            "description": "Should NOT extract date format 11.27MC"  
        },
        {
            "name": "商品名称 15LA 其他文字",
            "expected": None,
            "description": "Should NOT extract short date+procurement format"
        },
        
        # Single English word product codes (Bogner cases)
        {
            "name": "Bogner博格纳 女款 绿色 连帽棉服夹克 MAGAN 代购8.16",
            "expected": "MAGAN",
            "description": "Should extract single English word MAGAN"
        },
        {
            "name": "小王国Bogner博格纳女款黑色拉链夹克ELIN代购8.16",
            "expected": "ELIN",
            "description": "Should extract single English word ELIN"
        },
        {
            "name": "小王国Bogner博格纳 女款短款双面排骨马甲ALIAH代购8.16",
            "expected": "ALIAH",
            "description": "Should extract single English word ALIAH"
        },
        {
            "name": "小王国Bogner博格纳女款浅色羽绒夹克KOSY 代购8.16",
            "expected": "KOSY",
            "description": "Should extract single English word KOSY"
        },
        
        # Additional user test cases
        {
            "name": "ArcTeryx 始祖鸟男款极光白SIMA HOODY带帽防晒服X6592代购6.20M",
            "expected": "X6592",
            "description": "Should extract alphanumeric code X6592"
        },
        {
            "name": "小王国 Bogner博格纳 女款蓝色排骨羽绒服FARAH-D 368 8.16",
            "expected": "FARAH-D 368",
            "description": "Should extract multi-part code FARAH-D 368"
        },
        {
            "name": "小王国 Aritzia 蓝色腋下包 117077 8.17 LAMC",
            "expected": "117077",
            "description": "Should extract numeric code 117077"
        },
        {
            "name": "小王国Stuart Weitzman女款黑色牛皮+羊毛短靴CHRLI CZY SJ787 LA",
            "expected": "CHRLI CZY SJ787",
            "description": "Should extract multi-word code CHRLI CZY SJ787"
        },
        {
            "name": "Polo Ralph Lauren 大童 棉质 LOGO马标 牛津衬衫 32367 代购MC",
            "expected": "32367",
            "description": "Should extract numeric code 32367"
        },
        {
            "name": "Moncler 男款 CLUNYE 羽绒服 代购 7.15MC",
            "expected": "CLUNYE",
            "description": "Should extract single English word CLUNYE"
        },
        {
            "name": "小王国Bogner 男款 格纹衬衫 TIMT-2O 762 8.2",
            "expected": "TIMT-2O 762",
            "description": "Should extract multi-part code TIMT-2O 762"
        },
        {
            "name": "小王国 Rag & Bone绿色短款圆领短袖T恤MOSS MAI BOXY TEE8.13",
            "expected": "MOSS MAI BOXY TEE",
            "description": "Should extract multi-word code MOSS MAI BOXY TEE"
        },
        {
            "name": "Alexander McQueen 女款黑色系带马丁靴586402 1000代购7.30",
            "expected": "586402 1000",
            "description": "Should extract numeric code with space 586402 1000"
        },
        {
            "name": "小王国Bogner 博格纳男款 白色带帽羽绒服TINO-D 8.2",
            "expected": "TINO-D",
            "description": "Should extract alphanumeric code TINO-D"
        }
    ]
    
    print("Testing Enhanced Product Code Extraction")
    print("=" * 50)
    
    passed = 0
    total = len(test_cases)
    
    for i, case in enumerate(test_cases, 1):
        result = extractor.extract_product_code(case["name"])
        expected = case["expected"]
        
        print(f"\nTest {i}: {case['description']}")
        print(f"Input: {case['name']}")
        print(f"Expected: {expected}")
        print(f"Got: {result}")
        
        # Debug: show what patterns matched
        clean_name = extractor._remove_brand(case["name"], None)
        print(f"Clean name: {clean_name}")
        
        procurement_keywords = {'AT', 'MC', 'GN', 'NY', 'LA', 'AP', 'SS', 'ZT', 'CN', '现货'}
        code_patterns = [
            r'[A-Za-z0-9]+(?:[-\s/][A-Za-z0-9]+)+',
            r'[A-Za-z]+\d+[A-Za-z0-9]*',
            r'\d+[A-Za-z]+[A-Za-z0-9]*',
            r'\b[A-Z]{2,}[0-9]{2,}\b',
            r'\b\d{6,}\b',
            r'[A-Za-z0-9]{6,}(?=\s|[\u4e00-\u9fff]|$)',
        ]
        
        all_matches = []
        for j, pattern in enumerate(code_patterns):
            matches = re.findall(pattern, clean_name)
            if matches:
                print(f"  Pattern {j+1} matched: {matches}")
                all_matches.extend(matches)
        
        valid_codes = []
        for code in all_matches:
            if extractor._is_valid_product_code(code, procurement_keywords):
                valid_codes.append(code.upper())
                print(f"  Valid code: {code}")
            else:
                print(f"  Invalid code: {code}")
        
        if result == expected:
            print("✅ PASS")
            passed += 1
        else:
            print("❌ FAIL")
    
    print(f"\n{'='*50}")
    print(f"Results: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
    
    return passed == total

if __name__ == "__main__":
    success = test_extraction()
    sys.exit(0 if success else 1)