#!/usr/bin/env python3
"""
重新运行所有订单的货号标准化处理
使用改进后的货号提取逻辑
"""

import asyncio
import sys
import os
from datetime import datetime

# 添加项目根目录到路径
sys.path.insert(0, os.getcwd())

from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select, update, func
from app.models.normalized_orders import OrderItemNorm
from app.core.config import settings
from app.utils.text_parser import ProductCodeExtractor

async def rerun_normalization():
    """重新运行所有订单的货号标准化"""
    
    engine = create_async_engine(settings.DATABASE_URL)
    async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
    
    extractor = ProductCodeExtractor()
    
    async with async_session() as session:
        try:
            # 获取所有订单的总数
            total_result = await session.execute(select(func.count(OrderItemNorm.id)))
            total_count = total_result.scalar()
            
            print(f"=== 开始重新标准化所有订单的货号 ===")
            print(f"总订单数量: {total_count}")
            print(f"开始时间: {datetime.now()}")
            print()
            
            # 分批处理订单
            batch_size = 100
            updated_count = 0
            processed_count = 0
            no_change_count = 0
            error_count = 0
            
            offset = 0
            while True:
                # 获取一批订单
                result = await session.execute(
                    select(OrderItemNorm)
                    .offset(offset)
                    .limit(batch_size)
                )
                orders = result.fetchall()
                
                if not orders:
                    break
                    
                for order_row in orders:
                    order = order_row[0]
                    processed_count += 1
                    
                    try:
                        # 提取新的货号
                        new_product_code = extractor.extract_product_code(
                            order.线上宝贝名称 or "", 
                            order.品牌
                        )
                        
                        # 如果货号发生变化，更新数据库
                        if new_product_code != order.货号:
                            old_code = order.货号
                            order.货号 = new_product_code
                            updated_count += 1
                            
                            # 每100个更新打印一次进度
                            if updated_count % 100 == 0 or updated_count <= 10:
                                print(f"更新 #{updated_count}: {order.线上宝贝名称[:50]}...")
                                print(f"  旧货号: {old_code}")
                                print(f"  新货号: {new_product_code}")
                                print()
                        else:
                            no_change_count += 1
                            
                        # 每1000个处理打印进度
                        if processed_count % 1000 == 0:
                            print(f"已处理: {processed_count}/{total_count} ({processed_count/total_count*100:.1f}%)")
                            
                    except Exception as e:
                        error_count += 1
                        print(f"处理订单 {order.id} 时出错: {str(e)}")
                        continue
                
                # 提交这一批的更改
                await session.commit()
                offset += batch_size
                
            print(f"\n=== 标准化完成 ===")
            print(f"处理订单总数: {processed_count}")
            print(f"更新的订单数: {updated_count}")
            print(f"未变更订单数: {no_change_count}")
            print(f"错误订单数: {error_count}")
            print(f"完成时间: {datetime.now()}")
            
            # 统计货号覆盖率
            print(f"\n=== 货号覆盖率统计 ===")
            
            # 总订单数
            total_result = await session.execute(select(func.count(OrderItemNorm.id)))
            total_orders = total_result.scalar()
            
            # 有货号的订单数
            with_code_result = await session.execute(
                select(func.count(OrderItemNorm.id)).where(OrderItemNorm.货号.isnot(None))
            )
            orders_with_code = with_code_result.scalar()
            
            # 没有货号的订单数
            without_code = total_orders - orders_with_code
            coverage_rate = (orders_with_code / total_orders * 100) if total_orders > 0 else 0
            
            print(f"总订单数: {total_orders}")
            print(f"有货号订单数: {orders_with_code}")
            print(f"无货号订单数: {without_code}")
            print(f"货号覆盖率: {coverage_rate:.1f}%")
            
            # 显示一些没有货号的订单示例
            if without_code > 0:
                print(f"\n=== 无货号订单示例 ===")
                no_code_result = await session.execute(
                    select(OrderItemNorm.线上宝贝名称, OrderItemNorm.品牌)
                    .where(OrderItemNorm.货号.is_(None))
                    .limit(10)
                )
                no_code_orders = no_code_result.fetchall()
                
                for i, (product_name, brand) in enumerate(no_code_orders, 1):
                    print(f"{i}. [{brand}] {product_name}")
                    
        except Exception as e:
            await session.rollback()
            print(f"标准化过程中出错: {str(e)}")
            raise
        finally:
            await engine.dispose()

if __name__ == "__main__":
    asyncio.run(rerun_normalization())