#!/usr/bin/env python3
"""
批量更新数据库中的颜色处理
应用新的颜色处理逻辑到现有数据
"""

import asyncio
import sys
import os
from datetime import datetime

# 添加项目根目录到 Python 路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from app.core.database import get_db
from app.utils.color_processor import process_color_attribute
from sqlalchemy import select, update, text


async def update_color_processing():
    """批量更新颜色处理"""
    print("=== 开始批量更新颜色处理 ===\n")
    
    async for db in get_db():
        try:
            # 1. 统计需要更新的记录
            print("1. 统计需要更新的记录...")
            
            result = await db.execute(text("""
                SELECT COUNT(*) 
                FROM order_items_norm 
                WHERE 颜色 IS NOT NULL AND 颜色 != ''
            """))
            
            total_records = result.scalar()
            print(f"   数据库中共有 {total_records} 条有颜色信息的记录")
            
            # 2. 分批处理记录
            batch_size = 100
            updated_count = 0
            processed_count = 0
            
            print(f"\n2. 开始分批处理（每批 {batch_size} 条）...")
            
            offset = 0
            while True:
                # 获取一批记录
                result = await db.execute(text(f"""
                    SELECT id, 颜色 
                    FROM order_items_norm 
                    WHERE 颜色 IS NOT NULL AND 颜色 != ''
                    LIMIT {batch_size} OFFSET {offset}
                """))
                
                batch = result.fetchall()
                if not batch:
                    break
                
                batch_updates = []
                for record_id, current_color in batch:
                    processed_count += 1
                    
                    # 应用新的颜色处理逻辑
                    optimized_color = process_color_attribute(current_color)
                    
                    # 如果颜色有改变，记录更新
                    if optimized_color and optimized_color != current_color:
                        batch_updates.append({
                            'id': record_id,
                            'old_color': current_color,
                            'new_color': optimized_color
                        })
                
                # 批量执行更新
                if batch_updates:
                    for update_info in batch_updates:
                        await db.execute(text("""
                            UPDATE order_items_norm 
                            SET 颜色 = :new_color, updated_at = :now
                            WHERE id = :record_id
                        """), {
                            'new_color': update_info['new_color'],
                            'record_id': update_info['id'],
                            'now': datetime.now()
                        })
                        
                        updated_count += 1
                        
                        # 输出更新详情
                        if updated_count <= 50:  # 只显示前50个更新
                            print(f"   {updated_count:3d}. ID {update_info['id']:6d}: {update_info['old_color']:20} -> {update_info['new_color']}")
                        elif updated_count == 51:
                            print("   ... (更多更新记录，不再显示)")
                
                # 提交批次
                await db.commit()
                
                # 进度提示
                if processed_count % 500 == 0 or processed_count == total_records:
                    print(f"   已处理 {processed_count}/{total_records} 条记录，已更新 {updated_count} 条")
                
                offset += batch_size
            
            print(f"\n3. 更新完成")
            print(f"   总处理记录数: {processed_count}")
            print(f"   实际更新记录数: {updated_count}")
            print(f"   无需更新记录数: {processed_count - updated_count}")
            
            # 4. 验证更新结果
            print(f"\n4. 验证更新结果...")
            
            # 展示一些更新后的样本
            result = await db.execute(text("""
                SELECT 线上宝贝名称, 颜色
                FROM order_items_norm 
                WHERE 颜色 IS NOT NULL 
                AND 颜色 LIKE '%(%)'
                ORDER BY updated_at DESC 
                LIMIT 10
            """))
            
            samples = result.fetchall()
            if samples:
                print("   更新后的颜色样本:")
                for i, (product_name, color) in enumerate(samples, 1):
                    print(f"   {i:2d}. {product_name[:40]:40} -> {color}")
            
            # 统计更新后的颜色格式分布
            result = await db.execute(text("""
                SELECT 
                    CASE 
                        WHEN 颜色 LIKE '%(%' THEN '中文+英文参考'
                        WHEN 颜色 REGEXP '^[A-Za-z\\s\\-]+$' THEN '纯英文'  
                        WHEN 颜色 REGEXP '^[\\u4e00-\\u9fff]+$' THEN '纯中文'
                        ELSE '其他格式'
                    END as color_format,
                    COUNT(*) as count
                FROM order_items_norm 
                WHERE 颜色 IS NOT NULL AND 颜色 != ''
                GROUP BY color_format
                ORDER BY count DESC
            """))
            
            print(f"\n   颜色格式分布统计:")
            format_stats = result.fetchall()
            for format_type, count in format_stats:
                print(f"   {format_type:15}: {count:6d} 条记录")
            
            print(f"\n=== 颜色处理更新完成 ===")
            
        except Exception as e:
            await db.rollback()
            print(f"更新过程中发生错误: {e}")
            import traceback
            traceback.print_exc()
        
        break


async def preview_color_updates():
    """预览即将进行的颜色更新（不实际更新）"""
    print("=== 预览颜色更新 ===\n")
    
    async for db in get_db():
        try:
            result = await db.execute(text("""
                SELECT 线上宝贝名称, 颜色
                FROM order_items_norm 
                WHERE 颜色 IS NOT NULL AND 颜色 != ''
                ORDER BY updated_at DESC 
                LIMIT 50
            """))
            
            records = result.fetchall()
            
            print("当前颜色格式预览（前50条）:")
            print("序号 | 商品名称 | 当前颜色 | 优化后颜色 | 变化")
            print("-" * 120)
            
            change_count = 0
            for i, (product_name, current_color) in enumerate(records, 1):
                optimized_color = process_color_attribute(current_color)
                changed = optimized_color != current_color if optimized_color else False
                
                if changed:
                    change_count += 1
                
                status = "需要更新" if changed else "无需更新"
                optimized_display = optimized_color if optimized_color else current_color
                
                print(f"{i:2d}   | {product_name[:25]:25} | {current_color[:15]:15} | {optimized_display[:20]:20} | {status}")
            
            print(f"\n预览结果:")
            print(f"  检查记录数: {len(records)}")
            print(f"  需要更新数: {change_count}")
            print(f"  无需更新数: {len(records) - change_count}")
            
        except Exception as e:
            print(f"预览过程中发生错误: {e}")
            import traceback
            traceback.print_exc()
        
        break


async def main():
    """主函数"""
    print("颜色处理批量更新工具")
    print("=" * 60)
    
    import sys
    
    if len(sys.argv) > 1 and sys.argv[1] == '--preview':
        # 预览模式
        await preview_color_updates()
    elif len(sys.argv) > 1 and sys.argv[1] == '--update':
        # 更新模式（自动确认）
        print("开始执行颜色处理更新...")
        await update_color_processing()
    else:
        # 默认预览模式
        print("使用说明:")
        print("  python update_color_processing.py --preview  # 预览更新")
        print("  python update_color_processing.py --update   # 执行更新")
        print("")
        await preview_color_updates()


if __name__ == "__main__":
    asyncio.run(main())