#!/usr/bin/env python3
"""
Search for any products that still have problematic brand names
"""
import sys
import asyncio
sys.path.append('backend')

from sqlalchemy import select, func, text, or_
from app.core.database import get_db
from app.models.products_master import ProductMaster

async def search_problematic_brands():
    """Search for products with problematic brand names"""
    print("搜索有问题的品牌名...")
    
    try:
        async for db in get_db():
            # 1. Look for brands that are unusually long (likely include product description)
            print("=== 1. 检查过长的品牌名 (>30字符) ===")
            long_brand_query = select(ProductMaster).where(
                func.length(ProductMaster.品牌) > 30
            ).limit(10)
            
            long_brands_result = await db.execute(long_brand_query)
            long_brand_products = long_brands_result.scalars().all()
            
            if long_brand_products:
                print(f"发现 {len(long_brand_products)} 个过长品牌名的产品:")
                for i, product in enumerate(long_brand_products, 1):
                    print(f"\n{i}. 产品名称: {product.线上宝贝名称}")
                    print(f"   品牌: '{product.品牌}' (长度: {len(product.品牌)})")
                    print(f"   货号: '{product.货号}'")
            else:
                print("✅ 没有发现过长的品牌名")
            
            # 2. Look for brands that contain "代购" or dates
            print(f"\n=== 2. 检查包含'代购'或日期的品牌名 ===")
            daigou_query = select(ProductMaster).where(
                or_(
                    ProductMaster.品牌.like('%代购%'),
                    ProductMaster.品牌.like('%8.1%'),
                    ProductMaster.品牌.like('%MC%'),
                    ProductMaster.品牌.like('%LA%')
                )
            ).limit(10)
            
            daigou_result = await db.execute(daigou_query)
            daigou_products = daigou_result.scalars().all()
            
            if daigou_products:
                print(f"发现 {len(daigou_products)} 个包含'代购'或日期的品牌名:")
                for i, product in enumerate(daigou_products, 1):
                    print(f"\n{i}. 产品名称: {product.线上宝贝名称}")
                    print(f"   品牌: '{product.品牌}'")
                    print(f"   货号: '{product.货号}'")
            else:
                print("✅ 没有发现包含'代购'或日期的品牌名")
            
            # 3. Look for Tods products with problematic brand names
            print(f"\n=== 3. 检查所有Tods相关产品的品牌名 ===")
            tods_query = select(ProductMaster).where(
                or_(
                    ProductMaster.线上宝贝名称.like('%Tods%'),
                    ProductMaster.品牌.like('%Tods%')
                )
            )
            
            tods_result = await db.execute(tods_query)
            tods_products = tods_result.scalars().all()
            
            print(f"找到 {len(tods_products)} 个Tods相关产品:")
            problematic_tods = []
            for product in tods_products:
                if product.品牌 != "Tods":
                    problematic_tods.append(product)
            
            if problematic_tods:
                print(f"发现 {len(problematic_tods)} 个有问题的Tods产品:")
                for i, product in enumerate(problematic_tods, 1):
                    print(f"\n{i}. 产品名称: {product.线上宝贝名称}")
                    print(f"   品牌: '{product.品牌}' ← 应该是 'Tods'")
                    print(f"   货号: '{product.货号}'")
            else:
                print("✅ 所有Tods产品的品牌名都正确")
            
            # 4. Statistics on brand name lengths
            print(f"\n=== 4. 品牌名长度统计 ===")
            stats_query = select(
                func.min(func.length(ProductMaster.品牌)).label('min_len'),
                func.max(func.length(ProductMaster.品牌)).label('max_len'),
                func.avg(func.length(ProductMaster.品牌)).label('avg_len'),
                func.count(ProductMaster.id).label('total_products')
            )
            
            stats_result = await db.execute(stats_query)
            stats = stats_result.first()
            
            print(f"品牌名长度统计:")
            print(f"  最短: {stats.min_len} 字符")
            print(f"  最长: {stats.max_len} 字符")  
            print(f"  平均: {stats.avg_len:.1f} 字符")
            print(f"  总产品数: {stats.total_products}")
            
            # 5. Show distribution of brand name lengths
            print(f"\n=== 5. 品牌名长度分布 ===")
            length_dist_query = text("""
                SELECT 
                    CASE 
                        WHEN length(品牌) <= 10 THEN '≤10字符'
                        WHEN length(品牌) <= 20 THEN '11-20字符'
                        WHEN length(品牌) <= 30 THEN '21-30字符'
                        WHEN length(品牌) <= 50 THEN '31-50字符'
                        ELSE '>50字符'
                    END as length_range,
                    COUNT(*) as count,
                    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM products_master), 1) as percentage
                FROM products_master 
                GROUP BY length_range
                ORDER BY MIN(length(品牌))
            """)
            
            length_result = await db.execute(length_dist_query)
            length_rows = length_result.fetchall()
            
            for row in length_rows:
                print(f"  {row[0]}: {row[1]} 个产品 ({row[2]}%)")
            
            break
            
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    asyncio.run(search_problematic_brands())