#!/usr/bin/env python3
"""
全面分析Bogner品牌货号缺失问题
"""

import sys
import os
import asyncio
from sqlalchemy import text

# 添加项目根目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

from app.core.database import AsyncSessionLocal
from app.utils.text_parser import ProductCodeExtractor

async def analyze_bogner_comprehensive():
    """全面分析Bogner品牌货号缺失情况"""
    
    async with AsyncSessionLocal() as session:
        # 查询Bogner品牌的所有订单
        result = await session.execute(text("""
            SELECT 线上宝贝名称, 货号, 线上销售属性
            FROM order_items_norm 
            WHERE 品牌 = 'Bogner'
            ORDER BY id
        """))
        
        bogner_orders = result.fetchall()
        
        print(f"=== Bogner品牌货号全面分析 ===")
        print(f"总数: {len(bogner_orders)}")
        
        missing_count = 0
        extracted_count = 0
        extractor = ProductCodeExtractor()
        
        missing_examples = []
        extracted_examples = []
        
        for i, order in enumerate(bogner_orders, 1):
            product_name = order[0]
            current_code = order[1]
            sales_attrs = order[2] or ""
            
            # 测试当前逻辑能否提取到货号
            extracted = extractor.extract_product_code(product_name, "Bogner", sales_attrs)
            
            if not current_code or current_code.strip() == "":
                missing_count += 1
                if extracted:
                    extracted_count += 1
                    extracted_examples.append({
                        'name': product_name,
                        'extracted': extracted,
                        'sales_attrs': sales_attrs
                    })
                else:
                    missing_examples.append({
                        'name': product_name,
                        'sales_attrs': sales_attrs
                    })
        
        # 统计
        has_code_count = len(bogner_orders) - missing_count
        can_extract_count = extracted_count
        
        print(f"\n=== 统计结果 ===")
        print(f"数据库有货号: {has_code_count}/{len(bogner_orders)} ({has_code_count/len(bogner_orders)*100:.1f}%)")
        print(f"数据库缺失货号: {missing_count}/{len(bogner_orders)} ({missing_count/len(bogner_orders)*100:.1f}%)")
        print(f"缺失中可提取: {can_extract_count}/{missing_count} ({can_extract_count/missing_count*100:.1f}%)")
        print(f"缺失中无法提取: {len(missing_examples)}/{missing_count} ({len(missing_examples)/missing_count*100:.1f}%)")
        
        print(f"\n=== 可以提取但数据库缺失的货号（前10个）===")
        for i, example in enumerate(extracted_examples[:10], 1):
            print(f"{i:2d}. 商品名称: {example['name']}")
            print(f"    提取货号: {example['extracted']}")
            print(f"    销售属性: {example['sales_attrs']}")
            print()
        
        print(f"\n=== 无法提取货号的商品（前15个）===")
        for i, example in enumerate(missing_examples[:15], 1):
            print(f"{i:2d}. 商品名称: {example['name']}")
            print(f"    销售属性: {example['sales_attrs']}")
            print()

if __name__ == "__main__":
    asyncio.run(analyze_bogner_comprehensive())