"""
产品主表生成服务 - 架构优化的核心服务
直接从原始订单生成产品主表，替代标准化订单步骤
"""

import logging
import asyncio
import hashlib
import json
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Any
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, update, and_, or_, func, delete
from sqlalchemy.exc import IntegrityError

from app.core.config import settings
from app.models.raw_orders import RawOrder
from app.models.products_master import ProductMaster, ProductSourceMapping, ImageDownloadLog
from app.services.alias_service import AliasService
from app.utils.text_parser import BrandExtractor, ProductCodeExtractor, AttributeExtractor
from app.utils.image_processor import ImageProcessor
from app.utils.file_hash import calculate_data_sha1
from app.services.cache_service import cache_service

logger = logging.getLogger(__name__)


class ProductsMasterService:
    """产品主表生成服务 - 系统优化的核心"""
    
    def __init__(self):
        self.alias_service = AliasService()
        self.image_processor = ImageProcessor(settings.IMAGE_SPLITTER)
        
        # 解析器
        self.brand_extractor: Optional[BrandExtractor] = None
        self.product_code_extractor = ProductCodeExtractor()
        self.attribute_extractor: Optional[AttributeExtractor] = None
        
        # 缓存
        self._brand_aliases_cache: Optional[Dict[str, str]] = None
        self._color_aliases_cache: Optional[Dict[str, str]] = None
        self._size_aliases_cache: Optional[Dict[str, str]] = None
    
    async def initialize_extractors(self, db: AsyncSession):
        """初始化解析器"""
        # 加载别名映射
        self._brand_aliases_cache = await self.alias_service.load_brand_aliases(db)
        self._color_aliases_cache = await self.alias_service.load_color_aliases(db)
        self._size_aliases_cache = await self.alias_service.load_size_aliases(db)
        
        # 初始化解析器
        self.brand_extractor = BrandExtractor(self._brand_aliases_cache)
        self.attribute_extractor = AttributeExtractor(
            self._color_aliases_cache,
            self._size_aliases_cache
        )
        
        logger.info(f"初始化解析器完成: {len(self._brand_aliases_cache)} 品牌, "
                   f"{len(self._color_aliases_cache)} 颜色, {len(self._size_aliases_cache)} 尺寸")
    
    async def generate_products_master(self, 
                                     db: AsyncSession,
                                     batch_size: int = None,
                                     force_rebuild: bool = False) -> Dict[str, Any]:
        """
        从原始订单生成产品主表 - 核心方法
        
        Args:
            db: 数据库会话
            batch_size: 批处理大小
            force_rebuild: 是否强制重建
            
        Returns:
            处理结果统计
        """
        batch_size = batch_size or settings.BATCH_SIZE
        
        # 初始化解析器
        await self.initialize_extractors(db)
        
        # 如果强制重建，清空产品主表
        if force_rebuild:
            await self._clear_products_master(db)
        
        # 获取需要处理的原始订单
        raw_orders = await self._get_raw_orders_for_processing(db, force_rebuild)
        
        result = {
            'total_raw_orders': len(raw_orders),
            'processed': 0,
            'created': 0,
            'updated': 0,
            'failed': 0,
            'skipped': 0,
            'errors': [],
            'products_count': 0
        }
        
        if not raw_orders:
            logger.info("没有需要处理的原始订单")
            return result
        
        logger.info(f"开始处理 {len(raw_orders)} 个原始订单...")
        
        # 分批处理
        for i in range(0, len(raw_orders), batch_size):
            batch = raw_orders[i:i + batch_size]
            batch_result = await self._process_batch(db, batch)
            
            # 合并结果
            for key in ['processed', 'created', 'updated', 'failed', 'skipped']:
                result[key] += batch_result[key]
            result['errors'].extend(batch_result['errors'])
            
            logger.info(f"批次 {i//batch_size + 1} 完成: "
                       f"{batch_result['processed']} 处理, "
                       f"{batch_result['created']} 创建, "
                       f"{batch_result['updated']} 更新")
        
        # 统计最终产品数量
        final_count = await self._count_products_master(db)
        result['products_count'] = final_count
        
        logger.info(f"产品主表生成完成: 处理 {result['processed']} 订单, "
                   f"创建 {result['created']} 产品, 更新 {result['updated']} 产品, "
                   f"总产品数: {result['products_count']}")
        
        return result
    
    async def _clear_products_master(self, db: AsyncSession):
        """清空产品主表（强制重建时）"""
        logger.info("强制重建: 清空产品主表...")
        
        # 删除关联表
        await db.execute(delete(ImageDownloadLog))
        await db.execute(delete(ProductSourceMapping))
        await db.execute(delete(ProductMaster))
        
        await db.commit()
        logger.info("产品主表清空完成")
    
    async def _get_raw_orders_for_processing(self, 
                                           db: AsyncSession, 
                                           force_rebuild: bool) -> List[RawOrder]:
        """获取需要处理的原始订单"""
        # 基础过滤条件
        base_filters = [
            RawOrder.原始订单编号.notlike('%合计%'),
            RawOrder.原始订单编号.notlike('%汇总%'),
            RawOrder.原始订单编号.notlike('%总计%'),
            RawOrder.原始订单编号.is_not(None),
            RawOrder.原始订单编号 != '',
            RawOrder.线上宝贝名称.is_not(None),  # 必须有商品名称
            RawOrder.订单单价.is_not(None)       # 必须有单价
        ]
        
        if force_rebuild:
            # 强制重建: 获取所有有效订单
            query = select(RawOrder).where(and_(*base_filters)).order_by(RawOrder.created_at)
        else:
            # 增量处理: 只处理新订单
            # 找出还没有对应产品主表记录的原始订单
            subquery = select(ProductSourceMapping.raw_order_id)
            
            query = (select(RawOrder)
                    .where(and_(*base_filters))
                    .where(RawOrder.id.not_in(subquery))
                    .order_by(RawOrder.created_at))
        
        result = await db.execute(query)
        orders = result.scalars().all()
        
        logger.info(f"获取到 {len(orders)} 个{'所有' if force_rebuild else '新'}订单待处理")
        return orders
    
    async def _process_batch(self, db: AsyncSession, raw_orders: List[RawOrder]) -> Dict[str, Any]:
        """处理一批原始订单"""
        result = {
            'processed': 0,
            'created': 0, 
            'updated': 0,
            'failed': 0,
            'skipped': 0,
            'errors': []
        }
        
        for raw_order in raw_orders:
            try:
                order_result = await self._process_single_order(db, raw_order)
                result['processed'] += 1
                result[order_result['action']] += 1
                
                # 每个订单单独提交，避免整批失败
                await db.commit()
                
            except Exception as e:
                # 单个订单失败，回滚并继续处理下一个
                await db.rollback()
                result['failed'] += 1
                error_msg = f"处理订单 {raw_order.id} ({raw_order.原始订单编号}) 失败: {str(e)}"
                result['errors'].append(error_msg)
                logger.error(error_msg, exc_info=True)
                continue
        
        return result
    
    async def _process_single_order(self, db: AsyncSession, raw_order: RawOrder) -> Dict[str, Any]:
        """处理单个原始订单"""
        # 1. 生成SKU键 (基于商品名称+销售属性)
        sku_key = self._generate_sku_key(raw_order)
        
        # 2. 检查产品是否已存在
        existing_product = await self._get_existing_product(db, sku_key)
        
        # 3. 解析订单数据
        parsed_data = await self._parse_order_data(raw_order)
        
        if existing_product:
            # 更新现有产品的统计信息
            await self._update_product_statistics(db, existing_product, raw_order, parsed_data)
            await self._create_source_mapping(db, existing_product.id, raw_order)
            return {'action': 'updated'}
        else:
            # 创建新产品
            new_product = await self._create_product_master(db, raw_order, parsed_data, sku_key)
            await self._create_source_mapping(db, new_product.id, raw_order)
            return {'action': 'created'}
    
    def _generate_sku_key(self, raw_order: RawOrder) -> str:
        """
        生成SKU唯一键
        基于: 线上宝贝名称 + 线上销售属性
        """
        product_name = str(raw_order.线上宝贝名称 or "")
        sales_attr = str(raw_order.线上销售属性 or "")
        
        # 组合键值
        combined = f"{product_name}|{sales_attr}"
        
        # 生成SHA256哈希
        return hashlib.sha256(combined.encode('utf-8')).hexdigest()
    
    async def _get_existing_product(self, db: AsyncSession, sku_key: str) -> Optional[ProductMaster]:
        """获取现有产品"""
        result = await db.execute(
            select(ProductMaster).where(ProductMaster.sku_key == sku_key)
        )
        return result.scalar_one_or_none()
    
    async def _parse_order_data(self, raw_order: RawOrder) -> Dict[str, Any]:
        """解析订单数据 - 核心解析逻辑"""
        # 安全的字符串转换
        def safe_str(value):
            if value is None:
                return ""
            if isinstance(value, list):
                return str(value[0]) if value else ""
            return str(value)
        
        product_name = safe_str(raw_order.线上宝贝名称)
        sales_attrs = safe_str(raw_order.线上销售属性)
        image_text = safe_str(raw_order.图片)
        
        # 解析品牌
        brand = None
        if self.brand_extractor:
            brand = self.brand_extractor.extract_brand(product_name)
        
        # 解析货号 - 智能选择策略：只有当商品名称货号是商家编码子集时才使用商家编码
        merchant_code = safe_str(raw_order.线上商家编码)
        product_code = None
        
        # 1. 从商品名称提取货号（主要来源）
        product_name_code = self.product_code_extractor.extract_product_code(
            product_name, brand)
        
        # 2. 从商家编码提取货号
        merchant_code_extracted = None
        if merchant_code and merchant_code.strip():
            merchant_code_extracted = self.product_code_extractor.extract_product_code(
                merchant_code, brand)
        
        # 3. 智能选择逻辑：只有当商品名称货号是商家编码的子集时，才使用商家编码
        if (merchant_code_extracted and product_name_code and 
            product_name_code in merchant_code_extracted):
            product_code = merchant_code_extracted
        elif product_name_code:
            product_code = product_name_code
        elif merchant_code_extracted:
            product_code = merchant_code_extracted
        
        # 4. 如果都没有，尝试从销售属性中提取（作为备用）
        if not product_code and sales_attrs:
            backup_code = self.product_code_extractor.extract_product_code(
                sales_attrs, brand)
            if backup_code:
                product_code = backup_code
        
        # 解析颜色和尺寸
        attributes = {}
        if self.attribute_extractor:
            attributes = self.attribute_extractor.extract_attributes(product_name, sales_attrs)
        
        # 颜色处理优化
        raw_color = attributes.get('color')
        color = raw_color
        if raw_color:
            try:
                from app.utils.color_processor import process_color_attribute
                processed_color = process_color_attribute(raw_color)
                if processed_color:
                    color = processed_color
            except ImportError:
                logger.warning("颜色处理器未找到，使用原始颜色")
        
        size = attributes.get('size')
        
        # 处理图片
        image_links = self.image_processor.process_image_links(image_text)
        original_urls = [link for link in image_links if link.strip()]
        
        # 判断采购方式
        procurement_method = self._determine_procurement_method(product_name, brand, sales_attrs)
        
        # 提取网店名称
        shop_name = safe_str(raw_order.网店名称) if hasattr(raw_order, '网店名称') else None
        
        return {
            'brand': brand,
            'product_code': product_code,
            'color': color,
            'size': size,
            'original_image_urls': original_urls,
            'procurement_method': procurement_method,
            'price': float(raw_order.订单单价) if raw_order.订单单价 else 0.0,
            'quantity': int(raw_order.数量) if raw_order.数量 else 1,
            'revenue': float(raw_order.订单金额) if raw_order.订单金额 else 0.0,
            'shop_name': shop_name,
        }
    
    def _determine_procurement_method(self, product_name: str, brand: str, sales_attributes: str = "") -> str:
        """
        正确的采购方式判断逻辑 - 基于原有系统的准确规则
        优先级：商品名称后缀 > 销售属性 > 品牌 > 默认
        """
        # 导入现有的采购方式判断逻辑
        try:
            from app.services.procurement_order_service_v2 import ProcurementOrderServiceV2
            service = ProcurementOrderServiceV2()
            # V2版本的方法可能需要调整，先检查是否有相同的静态方法
            # 如果V2没有这个方法，使用本地实现
            if hasattr(service, '_determine_procurement_method'):
                return service._determine_procurement_method(product_name or "", sales_attributes or "", brand or "")
            else:
                # 使用下面的本地实现
                raise ImportError("使用本地实现")
        except ImportError:
            # 使用原有系统的完整判断逻辑
            product_name = product_name.strip() if product_name else ""
            sales_attributes = sales_attributes.strip() if sales_attributes else ""
            brand = brand.strip() if brand else ""
            
            # 转为大写便于匹配
            product_name_upper = product_name.upper()
            sales_attributes_upper = sales_attributes.upper()
            brand_upper = brand.upper()
            
            # 规则1: MC库存 - 商品名称以MC/ZT结尾 或 销售属性包含MC/ZT
            if (product_name_upper.endswith(' MC') or 
                product_name_upper.endswith('MC') or
                product_name_upper.endswith(' ZT') or 
                product_name_upper.endswith('ZT') or
                'MC' in sales_attributes_upper or
                'ZT' in sales_attributes_upper):
                return "MC"
            
            # 规则2: SF旧金山 - 名字结尾是SF/SFM/SF M
            if (self._has_sf_suffix(product_name_upper)):
                # 如果属性中有其他地点标记，优先使用属性中的地点
                if self._has_independent_la(sales_attributes_upper):
                    return "LA"
                elif self._has_independent_ny(sales_attributes_upper):
                    return "NY"
                elif 'MC' in sales_attributes_upper or 'ZT' in sales_attributes_upper:
                    return "MC"
                else:
                    return "SF"

            # 规则3: FL佛罗里达 - 名字结尾是FL/FLM/FL M
            if (self._has_fl_suffix(product_name_upper)):
                # 如果属性中有其他地点标记，优先使用属性中的地点
                if self._has_independent_la(sales_attributes_upper):
                    return "LA"
                elif self._has_independent_ny(sales_attributes_upper):
                    return "NY"
                elif 'MC' in sales_attributes_upper or 'ZT' in sales_attributes_upper:
                    return "MC"
                else:
                    return "FL"

            # 规则3.5: CA加拿大 - 名字结尾是CA/CAM/CA M
            if (self._has_ca_suffix(product_name_upper)):
                # 如果属性中有其他地点标记，优先使用属性中的地点
                if self._has_independent_la(sales_attributes_upper):
                    return "LA"
                elif self._has_independent_ny(sales_attributes_upper):
                    return "NY"
                elif 'MC' in sales_attributes_upper or 'ZT' in sales_attributes_upper:
                    return "MC"
                else:
                    return "CA"

            # 规则4: NY属性 - 销售属性中有独立NY标记的优先归类为NY（优先级高于LA结尾）
            if self._has_independent_ny(sales_attributes_upper):
                return "NY"

            # 规则4.5: CA属性 - 销售属性中有独立CA标记的归类为CA
            if self._has_independent_ca(sales_attributes_upper):
                return "CA"

            # 规则5: AT/ATM Atelier - 商品名称以AT/ATM结尾且销售属性不含MC/ZT
            # 支持: AT, ATM, AT M, ATM11.27, AT 11.27 等模式
            if (self._has_at_suffix(product_name_upper) and
                'MC' not in sales_attributes_upper and
                'ZT' not in sales_attributes_upper):
                return "AT"
            
            # 规则4: AP/APM 昌昌 - 商品名称以AP/APM结尾且销售属性不含MC/ZT
            if ((product_name_upper.endswith(' AP') or 
                 product_name_upper.endswith('AP') or
                 product_name_upper.endswith(' APM') or
                 product_name_upper.endswith('APM')) and
                'MC' not in sales_attributes_upper and
                'ZT' not in sales_attributes_upper):
                return "AP"
            
            # 规则5: LA/LAM - 商品名称以LA/LAM结尾的归类为LA（NY属性优先级更高）
            if (product_name_upper.endswith(' LA') or 
                product_name_upper.endswith('LA') or
                product_name_upper.endswith(' LAM') or
                product_name_upper.endswith('LAM')):
                return "LA"
            
            # 规则6: SS/SSM Saks - 商品名称以SS/SSM结尾且销售属性不含MC/ZT
            if ((product_name_upper.endswith(' SS') or 
                 product_name_upper.endswith('SS') or
                 product_name_upper.endswith(' SSM') or
                 product_name_upper.endswith('SSM')) and
                'MC' not in sales_attributes_upper and
                'ZT' not in sales_attributes_upper):
                return "SS"
            
            # 规则7: 没有特定后缀，但销售属性有独立的LA
            suffixes_to_exclude = ['LA', 'MC', 'ZT', 'LAM', 'SS', 'SSM', 'AT', 'ATM', 'AP', 'APM', 'SF', 'SFM', 'FL', 'FLM', 'CA', 'CAM']
            has_excluded_suffix = any(
                product_name_upper.endswith(f' {suffix}') or
                product_name_upper.endswith(suffix)
                for suffix in suffixes_to_exclude
            )

            if not has_excluded_suffix and self._has_independent_la(sales_attributes_upper):
                return "LA"

            # 规则8: 没有特定后缀，但销售属性有独立的SF
            if not has_excluded_suffix and self._has_independent_sf(sales_attributes_upper):
                return "SF"

            # 规则9: 没有特定后缀，但销售属性有独立的FL
            if not has_excluded_suffix and self._has_independent_fl(sales_attributes_upper):
                return "FL"

            # 规则9.5: 没有特定后缀，但销售属性有独立的CA
            if not has_excluded_suffix and self._has_independent_ca(sales_attributes_upper):
                return "CA"

            # 规则10: 国内现货 - 现货关键词或特定品牌
            if (self._has_stock_keywords(product_name) or
                self._is_domestic_brand(brand_upper)):
                return "GN"

            # 规则11: 默认NY
            return "NY"
    
    def _has_sf_suffix(self, product_name_upper: str) -> bool:
        """检查商品名称是否以SF相关后缀结尾"""
        return (product_name_upper.endswith(' SF') or
                product_name_upper.endswith('SF') or
                product_name_upper.endswith(' SFM') or
                product_name_upper.endswith('SFM') or
                product_name_upper.endswith(' SF M') or
                product_name_upper.endswith('SF M'))

    def _has_fl_suffix(self, product_name_upper: str) -> bool:
        """检查商品名称是否以FL相关后缀结尾"""
        return (product_name_upper.endswith(' FL') or
                product_name_upper.endswith('FL') or
                product_name_upper.endswith(' FLM') or
                product_name_upper.endswith('FLM') or
                product_name_upper.endswith(' FL M') or
                product_name_upper.endswith('FL M'))

    def _has_ca_suffix(self, product_name_upper: str) -> bool:
        """检查商品名称是否以CA相关后缀结尾"""
        return (product_name_upper.endswith(' CA') or
                product_name_upper.endswith('CA') or
                product_name_upper.endswith(' CAM') or
                product_name_upper.endswith('CAM') or
                product_name_upper.endswith(' CA M') or
                product_name_upper.endswith('CA M'))

    def _has_at_suffix(self, product_name_upper: str) -> bool:
        """检查商品名称是否以AT相关后缀结尾（包括AT M和日期模式）"""
        import re

        # 基础后缀检查（保持与现有逻辑兼容）
        if (product_name_upper.endswith(' AT') or
            product_name_upper.endswith(' ATM') or
            product_name_upper.endswith(' AT M') or
            product_name_upper.endswith('AT M')):
            return True

        # 检查纯AT/ATM结尾（需确保前面是空格或中文，避免匹配FLAT等词）
        if product_name_upper.endswith('AT') or product_name_upper.endswith('ATM'):
            # 找到AT/ATM的起始位置
            suffix_len = 3 if product_name_upper.endswith('ATM') else 2
            if len(product_name_upper) > suffix_len:
                char_before = product_name_upper[-(suffix_len + 1)]
                # 前面是空格或中文字符（Unicode > 127）才匹配
                if char_before == ' ' or ord(char_before) > 127:
                    return True
                # 新增: 检查 日期+AT 模式（如 1.3AT, 12.20AT）
                # 前面是数字时，检查是否为日期格式
                if char_before.isdigit():
                    # 匹配模式: 空格或中文 + 数字.数字 + AT/ATM
                    pattern_date_before = r'(?:^|[\s\u4e00-\u9fff])\d{1,2}\.\d{1,2}ATM?$'
                    if re.search(pattern_date_before, product_name_upper):
                        return True
                # 新增: 检查 特殊字符+AT 模式（如 ?AT）
                # 前面是特殊字符（非字母数字）时，检查再前面是否是空格
                if not char_before.isalnum() and len(product_name_upper) > suffix_len + 1:
                    char_before_special = product_name_upper[-(suffix_len + 2)]
                    if char_before_special == ' ' or ord(char_before_special) > 127:
                        return True
            elif len(product_name_upper) == suffix_len:
                # 整个字符串就是AT或ATM
                return True

        # 检查 ATM/AT + 日期模式（如 ATM11.27, AT 11.27, AT11.27）
        # 匹配：AT或ATM后跟可选空格+数字.数字格式
        pattern = r'(?:^|[\s\u4e00-\u9fff])ATM?\s*\d{1,2}\.\d{1,2}$'
        if re.search(pattern, product_name_upper):
            return True

        return False

    def _has_independent_la(self, sales_attributes_upper: str) -> bool:
        """检查销售属性中是否有独立的LA（LA后面只能是结束、_、[等分隔符）"""
        import re
        # LA后面必须是结束、下划线、空格、数字、方括号等分隔符，不能是字母
        pattern = r'LA(?=_|\s|$|\d|\[)'
        return bool(re.search(pattern, sales_attributes_upper))

    def _has_independent_ny(self, sales_attributes_upper: str) -> bool:
        """检查销售属性中是否有独立的NY（NY后面只能是结束、_、空格、方括号）"""
        import re
        # NY后面必须是：下划线、空格、结束、方括号
        # 不能是：数字（避免NY91这类编码）、字母（避免NYLON等）
        pattern = r'NY(?=_|\s|$|\[)'
        return bool(re.search(pattern, sales_attributes_upper))

    def _has_independent_sf(self, sales_attributes_upper: str) -> bool:
        """检查销售属性中是否有独立的SF（SF后面只能是结束、_、空格、方括号）"""
        import re
        pattern = r'SF(?=_|\s|$|\[)'
        return bool(re.search(pattern, sales_attributes_upper))

    def _has_independent_fl(self, sales_attributes_upper: str) -> bool:
        """检查销售属性中是否有独立的FL（FL后面只能是结束、_、空格、方括号）"""
        import re
        pattern = r'FL(?=_|\s|$|\[)'
        return bool(re.search(pattern, sales_attributes_upper))

    def _has_independent_ca(self, sales_attributes_upper: str) -> bool:
        """检查销售属性中是否有独立的CA（CA后面只能是结束、_、空格、方括号）"""
        import re
        # CA后面必须是：下划线、空格、结束、方括号
        # 不能是：数字（避免CA91这类编码）、字母（避免CANADA等）
        pattern = r'CA(?=_|\s|$|\[)'
        return bool(re.search(pattern, sales_attributes_upper))

    def _has_stock_keywords(self, product_name: str) -> bool:
        """检查商品名称是否包含现货关键词"""
        return ('现货' in product_name or 
                'GN现货' in product_name or 
                '差价' in product_name)
    
    def _is_domestic_brand(self, brand_upper: str) -> bool:
        """检查是否是国内现货品牌"""
        domestic_brands = ['GRUNDZERO', 'JANSPORT', 'EMU']
        return any(domestic_brand in brand_upper for domestic_brand in domestic_brands)
    
    async def _create_product_master(self, 
                                   db: AsyncSession,
                                   raw_order: RawOrder,
                                   parsed_data: Dict[str, Any],
                                   sku_key: str) -> ProductMaster:
        """创建新的产品主表记录"""
        
        # 初始化网店名称列表
        shop_names = []
        if parsed_data.get('shop_name'):
            shop_names = [parsed_data['shop_name']]
        
        product = ProductMaster(
            sku_key=sku_key,
            
            # 原始商品信息
            线上宝贝名称=raw_order.线上宝贝名称,
            线上销售属性=raw_order.线上销售属性,
            
            # 解析后的属性
            品牌=parsed_data['brand'],
            货号=parsed_data['product_code'],
            颜色=parsed_data['color'],
            尺寸=parsed_data['size'],
            
            # 网店信息
            网店名称=shop_names if shop_names else None,
            
            # 商品编码
            线上商家编码=raw_order.线上商家编码,
            商品编号=raw_order.商品编号,
            SKU编号=raw_order.SKU编号,
            
            # 采购相关
            procurement_method=parsed_data['procurement_method'],
            
            # 图片信息 - 直接使用在线URL
            original_image_urls=json.dumps(parsed_data['original_image_urls'], ensure_ascii=False),
            main_image_path=parsed_data['original_image_urls'][0] if parsed_data['original_image_urls'] else None,
            image_download_status="online" if parsed_data['original_image_urls'] else "no_images",  # 改为online，不再自动下载
            
            # 价格统计 (初始值)
            avg_price=parsed_data['price'],
            min_price=parsed_data['price'],
            max_price=parsed_data['price'],
            latest_price=parsed_data['price'],
            
            # 销量统计 (初始值)
            total_quantity=parsed_data['quantity'],
            total_orders=1,
            total_revenue=parsed_data['revenue'],
            
            # 时间信息
            first_order_time=raw_order.付款时间,
            last_order_time=raw_order.付款时间,
            
            # 采购需求 (初始为待采购)
            pending_procurement_qty=parsed_data['quantity'],
            
            # 状态
            is_active=True,
            status="ACTIVE",
            
            # 来源信息
            source_order_count=1,
            last_source_order_id=raw_order.id
        )
        
        try:
            db.add(product)
            await db.flush()  # 获取ID但不提交
            logger.debug(f"创建产品主表记录: {product.id}, SKU: {sku_key[:12]}...")
            return product
            
        except IntegrityError as e:
            await db.rollback()
            if "sku_key" in str(e):
                logger.warning(f"SKU键冲突: {sku_key[:12]}...")
                # 尝试获取现有记录
                existing = await self._get_existing_product(db, sku_key)
                if existing:
                    return existing
            raise
    
    async def _update_product_statistics(self, 
                                       db: AsyncSession,
                                       product: ProductMaster,
                                       raw_order: RawOrder,
                                       parsed_data: Dict[str, Any]):
        """更新产品统计信息"""
        price = parsed_data['price']
        quantity = parsed_data['quantity']
        revenue = parsed_data['revenue']
        
        # 计算新的统计值
        new_total_orders = product.total_orders + 1
        new_total_quantity = product.total_quantity + quantity
        new_total_revenue = product.total_revenue + revenue
        
        # 更新价格统计
        new_avg_price = new_total_revenue / new_total_quantity if new_total_quantity > 0 else price
        new_min_price = min(product.min_price or price, price)
        new_max_price = max(product.max_price or price, price)
        
        # 更新时间信息
        payment_time = raw_order.付款时间
        new_first_time = min(product.first_order_time or payment_time, payment_time) if payment_time else product.first_order_time
        new_last_time = max(product.last_order_time or payment_time, payment_time) if payment_time else product.last_order_time
        
        # 合并网店名称
        existing_shops = []
        if product.网店名称:
            try:
                existing_shops = json.loads(product.网店名称) if isinstance(product.网店名称, str) else product.网店名称
            except:
                existing_shops = []
        
        new_shop_name = parsed_data.get('shop_name')
        if new_shop_name and new_shop_name not in existing_shops:
            existing_shops.append(new_shop_name)
        
        # 执行更新
        update_data = {
            'total_orders': new_total_orders,
            'total_quantity': new_total_quantity,
            'total_revenue': new_total_revenue,
            'avg_price': new_avg_price,
            'min_price': new_min_price,
            'max_price': new_max_price,
            'latest_price': price,
            'first_order_time': new_first_time,
            'last_order_time': new_last_time,
            'pending_procurement_qty': product.pending_procurement_qty + quantity,
            'source_order_count': product.source_order_count + 1,
            'last_source_order_id': raw_order.id,
            '网店名称': existing_shops if existing_shops else None,
            'updated_at': datetime.now()
        }
        
        await db.execute(
            update(ProductMaster)
            .where(ProductMaster.id == product.id)
            .values(**update_data)
        )
        
        logger.debug(f"更新产品统计: {product.id}, 新增订单数: 1, 新增销量: {quantity}")
    
    async def _create_source_mapping(self, 
                                   db: AsyncSession,
                                   product_id: int,
                                   raw_order: RawOrder):
        """创建产品来源映射"""
        # 先检查映射是否已存在
        existing = await db.execute(
            select(ProductSourceMapping)
            .where(ProductSourceMapping.product_master_id == product_id)
            .where(ProductSourceMapping.raw_order_id == raw_order.id)
        )
        if existing.scalar_one_or_none():
            logger.debug(f"映射已存在: product_id={product_id}, raw_order_id={raw_order.id}")
            return
        
        # 创建新映射
        mapping = ProductSourceMapping(
            product_master_id=product_id,
            raw_order_id=raw_order.id,
            contribution_quantity=int(raw_order.数量) if raw_order.数量 else 1,
            contribution_revenue=float(raw_order.订单金额) if raw_order.订单金额 else 0.0
        )
        
        try:
            db.add(mapping)
            await db.flush()  # 立即刷新以捕获错误
            logger.debug(f"创建映射: product_id={product_id}, raw_order_id={raw_order.id}")
        except IntegrityError as e:
            # 映射可能已存在（并发情况）
            await db.rollback()
            logger.warning(f"创建映射失败（可能并发）: {str(e)}")
    
    async def _count_products_master(self, db: AsyncSession) -> int:
        """统计产品主表记录数"""
        result = await db.execute(select(func.count(ProductMaster.id)))
        return result.scalar() or 0
    
    # === 图片下载相关方法 ===
    
    async def schedule_image_downloads(self, db: AsyncSession, limit: int = None) -> Dict[str, Any]:
        """
        调度图片下载任务
        为状态为 pending 的产品创建下载任务
        """
        # 查找需要下载图片的产品
        query = (select(ProductMaster)
                .where(ProductMaster.image_download_status == "pending")
                .where(ProductMaster.original_image_urls.is_not(None))
                .order_by(ProductMaster.created_at))
        
        if limit:
            query = query.limit(limit)
        
        result = await db.execute(query)
        products = result.scalars().all()
        
        download_tasks = []
        for product in products:
            try:
                urls = json.loads(product.original_image_urls) if product.original_image_urls else []
                for url in urls:
                    if url and url.strip():
                        # 创建下载日志记录
                        log_entry = ImageDownloadLog(
                            product_master_id=product.id,
                            sku_key=product.sku_key,
                            original_url=url.strip(),
                            download_status="pending"
                        )
                        db.add(log_entry)
                        download_tasks.append({
                            'product_id': product.id,
                            'sku_key': product.sku_key,
                            'url': url.strip()
                        })
            except (json.JSONDecodeError, TypeError) as e:
                logger.error(f"解析产品 {product.id} 图片URL失败: {e}")
        
        await db.commit()
        
        logger.info(f"调度图片下载任务: {len(download_tasks)} 个图片待下载")
        
        return {
            'products_count': len(products),
            'download_tasks_count': len(download_tasks),
            'tasks': download_tasks
        }
    
    async def get_products_list(self,
                              db: AsyncSession,
                              product_name: Optional[str] = None,
                              brand: Optional[str] = None,
                              procurement_method: Optional[str] = None,
                              shops: Optional[List[str]] = None,
                              is_active: Optional[bool] = None,
                              image_status: Optional[str] = None,
                              page: int = 1,
                              page_size: int = 50,
                              cursor_id: Optional[int] = None,
                              use_cursor: bool = False) -> Dict[str, Any]:
        """
        获取产品主表列表
        纯产品数据，不包含聚合信息
        """
        # 确保加载了别名映射
        if not self._brand_aliases_cache:
            await self.initialize_extractors(db)
        
        # 构建查询条件
        filters = []
        
        if product_name:
            filters.append(ProductMaster.线上宝贝名称.contains(product_name))
        
        if brand:
            # Normalize brand using aliases if available
            normalized_brand = brand
            if self._brand_aliases_cache and brand in self._brand_aliases_cache:
                normalized_brand = self._brand_aliases_cache[brand]
            filters.append(ProductMaster.品牌 == normalized_brand)
            
        if procurement_method:
            filters.append(ProductMaster.procurement_method == procurement_method)
            
        if shops and len(shops) > 0:
            # 优化JSON查询：使用SQLite的JSON函数或更高效的字符串匹配
            shop_conditions = []
            for shop in shops:
                # 方法1：使用json_extract函数（SQLite 3.9+）
                # 检查JSON数组中是否包含该网店
                from sqlalchemy import text
                
                # 处理Unicode转义格式
                escaped_shop = shop.encode('unicode_escape').decode('ascii')
                
                # 使用LIKE模式匹配，这样可以利用索引
                shop_conditions.append(
                    ProductMaster.网店名称.like(f'%"{shop}"%')
                )
                # 同时匹配转义格式
                if escaped_shop != shop:
                    shop_conditions.append(
                        ProductMaster.网店名称.like(f'%{escaped_shop}%')
                    )
            
            if shop_conditions:
                from sqlalchemy import or_
                filters.append(or_(*shop_conditions))
            
        if is_active is not None:
            filters.append(ProductMaster.is_active == is_active)
        
        # 添加图片状态筛选
        if image_status:
            if image_status == 'has_image':
                # 有图片：main_image_path不为空且不为null
                filters.append(ProductMaster.main_image_path.isnot(None))
                filters.append(ProductMaster.main_image_path != '')
            elif image_status == 'no_image':
                # 无图片：main_image_path为空或为null
                from sqlalchemy import or_
                filters.append(or_(
                    ProductMaster.main_image_path.is_(None),
                    ProductMaster.main_image_path == ''
                ))
        
        # 计算总数（使用缓存）
        # 生成缓存键
        cache_params = {
            'product_name': product_name,
            'brand': brand,
            'procurement_method': procurement_method,
            'shops': shops,
            'is_active': is_active,
            'image_status': image_status
        }
        cache_key = cache_service._generate_key('product_count', cache_params)
        
        # 尝试从缓存获取
        cached_total = cache_service.get(cache_key)
        
        if cached_total is not None:
            total = cached_total
            logger.debug(f"使用缓存的总数: {total}")
        else:
            # 执行数据库查询
            count_query = select(func.count(ProductMaster.id))
            if filters:
                count_query = count_query.where(and_(*filters))
            
            total_result = await db.execute(count_query)
            total = total_result.scalar() or 0
            
            # 存入缓存（缓存5分钟）
            cache_service.set(cache_key, total, ttl=300)
            logger.debug(f"查询总数并缓存: {total}")
        
        # 获取分页数据
        query = select(ProductMaster)
        if filters:
            query = query.where(and_(*filters))
        
        # 使用游标分页或传统分页
        if use_cursor and cursor_id:
            # 游标分页：基于ID，性能更好，特别是深分页
            query = query.where(ProductMaster.id > cursor_id)
            query = query.order_by(ProductMaster.id).limit(page_size)
        else:
            # 传统OFFSET分页
            query = (query
                    .order_by(ProductMaster.updated_at.desc())
                    .offset((page - 1) * page_size)
                    .limit(page_size))
        
        result = await db.execute(query)
        products = result.scalars().all()
        
        # 转换为前端所需格式
        items = []
        for product in products:
            # 解析图片URLs
            original_urls = []
            if product.original_image_urls:
                try:
                    original_urls = json.loads(product.original_image_urls)
                except (json.JSONDecodeError, TypeError):
                    pass
            
            # 获取主图片路径
            main_image_path = None
            if product.main_image_path:
                main_image_path = product.main_image_path
            elif original_urls:
                # 如果没有本地主图片，使用第一个原始URL
                main_image_path = original_urls[0]
            
            items.append({
                'id': product.id,
                'sku_key': product.sku_key,
                '线上宝贝名称': product.线上宝贝名称,
                '品牌': product.品牌,
                '货号': product.货号,
                '题色': product.颜色,  # 数据库字段是颜色，前端显示为题色
                '尺寸': product.尺寸,
                '线上销售属性': product.线上销售属性,
                '网店名称': product.网店名称,  # 添加网店名称字段
                'procurement_method': product.procurement_method,
                'main_image_path': main_image_path,
                'original_image_urls': original_urls,
                'image_download_status': product.image_download_status,
                'is_active': product.is_active,
                'created_at': product.created_at.isoformat() if product.created_at else None,
                'updated_at': product.updated_at.isoformat() if product.updated_at else None
            })
        
        # 获取品牌统计用于过滤选项
        brand_query = (select(ProductMaster.品牌, func.count(ProductMaster.id).label('count'))
                      .where(ProductMaster.品牌.is_not(None))
                      .group_by(ProductMaster.品牌)
                      .order_by(func.count(ProductMaster.id).desc()))
        
        brand_result = await db.execute(brand_query)
        brands = [{'brand': row[0], 'count': row[1]} for row in brand_result.all()]
        
        # 准备游标信息
        next_cursor = None
        if use_cursor and products:
            # 最后一个产品的ID作为下一页的游标
            next_cursor = products[-1].id
        
        return {
            'items': items,
            'pagination': {
                'page': page,
                'page_size': page_size,
                'total': total,
                'pages': (total + page_size - 1) // page_size,
                'use_cursor': use_cursor,
                'next_cursor': next_cursor
            },
            'basic_stats': {
                'total_products': total,
                'active_products': sum(1 for item in items if item['is_active']),
                'total_brands': len(brands),
                'total_categories': 8  # 采购方式数量
            },
            'top_brands': brands[:20]  # 前20个品牌
        }
    
    async def recover_missing_mappings(self, 
                                      db: AsyncSession, 
                                      batch_size: int = 100) -> Dict[str, Any]:
        """
        恢复缺失的ProductSourceMapping映射
        用于修复历史数据问题
        """
        logger.info("开始恢复缺失的ProductSourceMapping映射...")
        
        # 初始化解析器
        await self.initialize_extractors(db)
        
        # 查找没有映射的原始订单
        base_filters = [
            RawOrder.原始订单编号.notlike('%合计%'),
            RawOrder.原始订单编号.notlike('%汇总%'),
            RawOrder.原始订单编号.notlike('%总计%'),
            RawOrder.原始订单编号.is_not(None),
            RawOrder.原始订单编号 != '',
            RawOrder.线上宝贝名称.is_not(None),
            RawOrder.订单单价.is_not(None)
        ]
        
        # 子查询：已有映射的原始订单ID
        mapped_orders_subquery = select(ProductSourceMapping.raw_order_id)
        
        # 查询没有映射的原始订单
        query = (select(RawOrder)
                .where(and_(*base_filters))
                .where(RawOrder.id.not_in(mapped_orders_subquery))
                .order_by(RawOrder.created_at))
        
        result = await db.execute(query)
        unmapped_orders = result.scalars().all()
        
        logger.info(f"找到 {len(unmapped_orders)} 个缺失映射的原始订单")
        
        recovery_result = {
            'total_unmapped': len(unmapped_orders),
            'recovered': 0,
            'failed': 0,
            'errors': []
        }
        
        # 分批处理
        for i in range(0, len(unmapped_orders), batch_size):
            batch = unmapped_orders[i:i + batch_size]
            logger.info(f"处理批次 {i//batch_size + 1}/{(len(unmapped_orders) + batch_size - 1)//batch_size}")
            
            for raw_order in batch:
                try:
                    # 生成SKU键
                    sku_key = self._generate_sku_key(raw_order)
                    
                    # 查找或创建产品
                    existing_product = await self._get_existing_product(db, sku_key)
                    
                    if existing_product:
                        # 产品已存在，只需创建映射
                        await self._create_source_mapping(db, existing_product.id, raw_order)
                        # 更新产品统计
                        parsed_data = await self._parse_order_data(raw_order)
                        await self._update_product_statistics(db, existing_product, raw_order, parsed_data)
                    else:
                        # 产品不存在，创建新产品和映射
                        parsed_data = await self._parse_order_data(raw_order)
                        new_product = await self._create_product_master(db, raw_order, parsed_data, sku_key)
                        await self._create_source_mapping(db, new_product.id, raw_order)
                    
                    await db.commit()
                    recovery_result['recovered'] += 1
                    
                    if recovery_result['recovered'] % 100 == 0:
                        logger.info(f"已恢复 {recovery_result['recovered']} 个映射")
                    
                except Exception as e:
                    await db.rollback()
                    recovery_result['failed'] += 1
                    error_msg = f"恢复订单 {raw_order.id} ({raw_order.原始订单编号}) 映射失败: {str(e)}"
                    recovery_result['errors'].append(error_msg)
                    logger.error(error_msg)
        
        logger.info(f"映射恢复完成: 恢复 {recovery_result['recovered']} 个, 失败 {recovery_result['failed']} 个")
        
        return recovery_result
    
    async def get_available_brands(self, 
                                  db: AsyncSession, 
                                  procurement_method: Optional[str] = None,
                                  current_filters: Optional[Dict[str, Any]] = None,
                                  force_refresh: bool = False) -> List[str]:
        """
        获取可用品牌列表，基于当前筛选条件下的产品（带缓存）
        
        Args:
            db: 数据库会话
            procurement_method: 采购方式筛选
            current_filters: 当前的筛选条件，包含product_name, shops等
        
        Returns:
            当前筛选条件下存在的品牌列表
        """
        # 生成缓存键
        cache_params = {
            'procurement_method': procurement_method,
            'filters': current_filters or {}
        }
        cache_key = cache_service._generate_key('brands', cache_params)
        
        # 尝试从缓存获取（除非强制刷新）
        if not force_refresh:
            cached_brands = cache_service.get(cache_key)
            if cached_brands is not None:
                logger.debug(f"使用缓存的品牌列表: {len(cached_brands)}个品牌")
                return cached_brands
        
        from app.models.products_master import ProductMaster
        from sqlalchemy import select, distinct, and_, or_
        
        # 基础查询条件
        filters = [
            ProductMaster.品牌.is_not(None),
            ProductMaster.品牌 != '',
            ProductMaster.is_active == True
        ]
        
        # 如果指定了采购方式，添加筛选条件
        if procurement_method:
            filters.append(ProductMaster.procurement_method == procurement_method)
        
        # 如果提供了其他筛选条件
        if current_filters:
            # 商品名称筛选
            if current_filters.get('product_name'):
                filters.append(ProductMaster.线上宝贝名称.contains(current_filters['product_name']))
            
            # 网店筛选
            if current_filters.get('shops'):
                shop_conditions = []
                for shop in current_filters['shops']:
                    # 使用JSON_CONTAINS或LIKE匹配JSON数组中的网店
                    shop_conditions.append(ProductMaster.网店名称.contains(f'"{shop}"'))
                if shop_conditions:
                    filters.append(or_(*shop_conditions))
            
            # 图片状态筛选
            if current_filters.get('image_status'):
                if current_filters['image_status'] == 'has_image':
                    filters.append(ProductMaster.main_image_path.is_not(None))
                elif current_filters['image_status'] == 'no_image':
                    filters.append(ProductMaster.main_image_path.is_(None))
        
        # 构建查询
        query = select(distinct(ProductMaster.品牌)).where(and_(*filters))
        
        # 按品牌名称排序
        query = query.order_by(ProductMaster.品牌)
        
        result = await db.execute(query)
        brands = [row[0] for row in result.all() if row[0]]
        
        # 存入缓存（缓存1分钟）
        cache_service.set(cache_key, brands, ttl=60)
        logger.debug(f"查询并缓存品牌列表: {len(brands)}个品牌")
        
        return brands