#!/usr/bin/env python3
"""
重新生成product_key使用正确的SKU定义
SKU = 线上宝贝名称 + 线上销售属性
SPU = 线上宝贝名称
"""

import asyncio
import logging
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
from sqlalchemy import select, update, text
from app.models.normalized_orders import OrderItemNorm
from app.models.products import Product, PendingPurchase
from app.utils.file_hash import calculate_data_sha1

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


async def regenerate_product_keys():
    """重新生成所有product_key"""
    engine = create_async_engine('sqlite+aiosqlite:///./ordersys.db')
    async_session = async_sessionmaker(engine, expire_on_commit=False)
    
    async with async_session() as session:
        try:
            # 1. 先备份原有的product_key
            logger.info("备份原有product_key...")
            await session.execute(text("""
                ALTER TABLE order_items_norm 
                ADD COLUMN old_product_key VARCHAR(64)
            """))
            await session.execute(text("""
                UPDATE order_items_norm 
                SET old_product_key = product_key
            """))
            await session.commit()
            logger.info("备份完成")
        except Exception as e:
            logger.info(f"备份列可能已存在: {e}")
            await session.rollback()
        
        # 1.5 删除唯一性约束（SQLite需要重建表）
        logger.info("移除product_key唯一性约束...")
        try:
            # SQLite不支持直接删除约束，需要重建表
            # 这里我们直接更新为NULL，然后批量更新
            await session.execute(text("""
                UPDATE order_items_norm 
                SET product_key = NULL
            """))
            await session.commit()
            logger.info("清空product_key完成")
        except Exception as e:
            logger.error(f"清空product_key失败: {e}")
            await session.rollback()
        
        # 2. 获取所有标准化订单
        logger.info("获取所有标准化订单...")
        result = await session.execute(
            select(OrderItemNorm)
        )
        orders = result.scalars().all()
        logger.info(f"找到 {len(orders)} 条订单记录")
        
        # 3. 批量更新product_key
        logger.info("开始更新product_key...")
        update_count = 0
        batch_size = 100
        
        for i in range(0, len(orders), batch_size):
            batch = orders[i:i+batch_size]
            
            for order in batch:
                # 使用新的SKU定义生成product_key
                # SKU = 线上宝贝名称 + 线上销售属性
                sku_components = []
                
                # 线上宝贝名称（SPU）
                if order.线上宝贝名称:
                    sku_components.append(str(order.线上宝贝名称))
                else:
                    sku_components.append("")
                
                # 线上销售属性（区分SKU的关键）
                if order.线上销售属性:
                    sku_components.append(str(order.线上销售属性))
                else:
                    sku_components.append("")
                
                # 生成新的product_key
                combined_key = '|'.join(sku_components)
                new_product_key = calculate_data_sha1(combined_key)
                
                # 使用SQL直接更新，避免ORM约束
                await session.execute(text("""
                    UPDATE order_items_norm 
                    SET product_key = :new_key
                    WHERE id = :order_id
                """), {"new_key": new_product_key, "order_id": order.id})
                update_count += 1
            
            await session.commit()
            logger.info(f"已更新 {min(i+batch_size, len(orders))}/{len(orders)} 条记录")
        
        logger.info(f"product_key更新完成，共更新 {update_count} 条记录")
        
        # 4. 清理旧的产品数据
        logger.info("清理旧的产品聚合数据...")
        await session.execute(text("DELETE FROM products"))
        await session.execute(text("DELETE FROM pending_purchases"))
        await session.commit()
        logger.info("旧数据清理完成")
        
        # 5. 显示统计信息
        unique_keys = await session.execute(text("""
            SELECT COUNT(DISTINCT product_key) 
            FROM order_items_norm
        """))
        unique_count = unique_keys.scalar()
        
        logger.info("\n=== 更新统计 ===")
        logger.info(f"总订单记录数: {len(orders)}")
        logger.info(f"唯一SKU数量: {unique_count}")
        logger.info(f"平均每个SKU订单数: {len(orders)/unique_count:.2f}")
        
        # 显示聚合效果示例
        examples = await session.execute(text("""
            SELECT 线上宝贝名称, 线上销售属性, COUNT(*) as cnt
            FROM order_items_norm
            WHERE 线上宝贝名称 IS NOT NULL
            GROUP BY product_key
            HAVING COUNT(*) > 1
            ORDER BY cnt DESC
            LIMIT 5
        """))
        
        logger.info("\n=== 聚合效果示例（相同SKU的订单数） ===")
        for row in examples:
            logger.info(f"产品: {row[0][:50]}...")
            logger.info(f"属性: {row[1] or '无'}")
            logger.info(f"订单数: {row[2]}")
            logger.info("-" * 50)


if __name__ == "__main__":
    asyncio.run(regenerate_product_keys())
    print("\n✅ Product key重新生成完成！")
    print("📋 下一步：运行产品聚合服务重新生成产品数据")
    print("命令：python run_product_aggregation.py")