#!/usr/bin/env python3
"""
Check brand recognition issues in ProductMaster
"""
import sys
import asyncio
sys.path.append('backend')

from sqlalchemy import select, func, text
from app.core.database import get_db, init_db
from app.models.products_master import ProductMaster

async def check_brand_issues():
    """Check brand recognition issues"""
    print("Checking brand recognition issues...")
    
    try:
        # Get database session
        async for db in get_db():
            # Check products with problematic brands
            print("Looking for MaxMara, Tods, SportMax brand issues...")
            
            brand_check_query = select(ProductMaster).where(
                ProductMaster.品牌.like('MaxMara%') |
                ProductMaster.品牌.like('Tods%') |
                ProductMaster.品牌.like('SportMax%')
            ).limit(20)
            
            brand_issues = await db.execute(brand_check_query)
            problem_products = brand_issues.scalars().all()
            
            if problem_products:
                print(f"\nFound {len(problem_products)} products with potential brand issues:")
                for i, product in enumerate(problem_products, 1):
                    print(f"{i}. Product: {product.线上宝贝名称[:80]}...")
                    print(f"   Brand: '{product.品牌}'")
                    print(f"   Product Code: '{product.货号 or 'None'}'")
                    print("")
            else:
                print("No obvious brand issues found with these specific patterns.")
            
            # Let's also check for brands that might have extra content
            print("\nChecking for brands with unusual content...")
            
            # Look for brands that have numbers or special characters that might indicate problems
            unusual_brand_query = select(ProductMaster).where(
                ProductMaster.品牌.regexp_match(r'[0-9]{3,}|代购|8\.[0-9]+|[A-Z]{2,3}$') |
                (func.length(ProductMaster.品牌) > 30)
            ).limit(15)
            
            unusual_result = await db.execute(unusual_brand_query)
            unusual_brands = unusual_result.scalars().all()
            
            if unusual_brands:
                print(f"Found {len(unusual_brands)} products with unusual brand patterns:")
                for i, product in enumerate(unusual_brands, 1):
                    print(f"{i}. Product: {product.线上宝贝名称[:60]}...")
                    print(f"   Brand: '{product.品牌}'")
                    print(f"   Length: {len(product.品牌 or '')}")
                    print("")
            
            # Show top brands by frequency
            print("\nTop 15 brands by frequency:")
            brand_stats_query = select(
                ProductMaster.品牌,
                func.count(ProductMaster.id).label('count')
            ).where(
                ProductMaster.品牌.is_not(None)
            ).group_by(
                ProductMaster.品牌
            ).order_by(
                func.count(ProductMaster.id).desc()
            ).limit(15)
            
            brand_stats = await db.execute(brand_stats_query)
            top_brands = brand_stats.fetchall()
            
            for i, (brand, count) in enumerate(top_brands, 1):
                print(f"{i:2d}. {brand} ({count} products)")
            
            break
            
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    asyncio.run(check_brand_issues())