"""
发货清单上传服务
处理Excel文件上传，更新pending_shipments表
This is part of the new independent shipping module that does not affect existing modules.
"""

import re
import hashlib
import pandas as pd
from datetime import datetime
from typing import Optional, Dict, List, Tuple, Any
from pathlib import Path
import logging
from sqlalchemy import select, and_, or_, update, func
from sqlalchemy.ext.asyncio import AsyncSession
from fastapi import UploadFile

from app.models.pending_shipments import PendingShipment
from app.models.shipping_upload_batches import ShippingUploadBatch
from app.models.procurement_orders import ProcurementOrder

logger = logging.getLogger(__name__)

# 文件上传安全限制
MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
ALLOWED_MIME_TYPES = {
    'application/vnd.ms-excel',
    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
    'application/octet-stream',  # 某些系统上 Excel 可能被识别为这种类型
}
ALLOWED_EXTENSIONS = {'.xls', '.xlsx'}


class ShippingListUploadService:
    """发货清单上传服务"""

    # 店铺关联配置
    # 用于跨店铺订单匹配（如晴七公主的发货清单包含信泽供货商的订单）
    SHOP_ASSOCIATIONS = {
        '晴七公主': ['晴七公主', '信泽供货商'],  # 晴七公主的发货清单包含信泽供货商订单
        '小晴天7': ['小晴天7'],
        'uslife': ['uslife'],
        '信泽供货商': ['信泽供货商'],
    }

    # 店铺名称映射（可以根据订单号模式或其他规则识别）
    SHOP_MAPPING = {
        '小晴天7': 'xiaoqingtian7',
        'uslife': 'uslife',
        '晴七公主': 'qingqigongzhu',
        '信泽供货商': 'xinze',
    }

    # Excel列名映射到数据库字段
    COLUMN_MAPPING = {
        '子订单编号': '子订单编号',
        '主订单编号': '主订单编号',
        '商品标题': '商品标题',
        '商品价格': '商品价格',
        '购买数量': '购买数量',
        '外部系统编号': '外部系统编号',
        '商品属性': '商品属性',
        '套餐信息': '套餐信息',
        '联系方式备注': '联系方式备注',
        '订单状态': '订单状态',
        '商家编码': '商家编码',
        '支付单号': '支付单号',
        '买家应付货款': '买家应付货款',
        '买家实付金额': '买家实付金额',
        '退款状态': '退款状态',
        '退款金额': '退款金额',
        '订单创建时间': '订单创建时间',
        '订单付款时间': '订单付款时间',
        '淘鲜达渠道': '淘鲜达渠道',
        '商品ID': '商品ID',
        '分阶段信息': '分阶段信息',
        '备注标签': '备注标签',
        '商家备注': '商家备注',
        '主订单买家留言': '主订单买家留言',
        '发货时间': '发货时间',
        '物流单号': '物流单号',
        '物流公司': '物流公司',
        '是否主动赔付': '是否主动赔付',
        '主动赔付金额': '主动赔付金额',
        '主动赔付出账时间': '主动赔付出账时间',
    }

    @staticmethod
    def _safe_str_value(value, default=None):
        """
        安全转换值为字符串，正确处理 pandas NaN

        Args:
            value: 要转换的值
            default: NaN/None/空值时的默认返回值

        Returns:
            转换后的字符串或默认值
        """
        if pd.isna(value) or value is None:
            return default

        str_value = str(value).strip()
        # 处理字符串 "nan"（防止已有数据污染）
        if str_value.lower() in ('nan', ''):
            return default

        return str_value

    def _append_note_with_timestamp(self, existing_note: str | None, new_note: str | None) -> str | None:
        """
        智能追加新备注到现有备注，带时间戳（只有内容变化时才追加）

        Args:
            existing_note: 现有备注内容
            new_note: 新的备注内容

        Returns:
            追加后的备注，或None
        """
        # 如果新备注为空，保留原有备注
        if new_note is None or new_note.strip() == '':
            return existing_note

        new_note_stripped = new_note.strip()

        # 如果原有备注为空，直接使用新备注（不加时间戳）
        if existing_note is None or existing_note.strip() == '':
            return new_note_stripped

        existing_stripped = existing_note.strip()

        # 智能检查：如果新备注与最近一次的备注内容相同，不追加
        # 获取最后一条备注内容（可能带时间戳格式 [YYYY-MM-DD HH:MM] 或不带）
        lines = existing_stripped.split('\n')
        last_line = lines[-1].strip() if lines else ''

        # 提取最后一行的实际内容（去掉时间戳前缀）
        timestamp_pattern = r'^\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}\]\s*'
        last_content = re.sub(timestamp_pattern, '', last_line)

        # 如果新备注与最后一条内容相同，不追加
        if last_content == new_note_stripped:
            return existing_note

        # 也检查第一行（原始备注，通常没有时间戳）
        first_line = lines[0].strip() if lines else ''
        if first_line == new_note_stripped and len(lines) == 1:
            return existing_note

        # 内容有变化：追加新备注，加时间戳
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
        return f"{existing_stripped}\n[{timestamp}] {new_note_stripped}"

    async def process_shipping_list(
        self,
        db: AsyncSession,
        file: UploadFile,
        shop_name: Optional[str],
        operator: str
    ) -> Dict[str, Any]:
        """
        处理发货清单上传

        Args:
            db: 数据库会话
            file: 上传的Excel文件
            shop_name: 店铺名称（可选，如果未提供则自动识别）
            operator: 操作员

        Returns:
            处理结果统计
        """
        try:
            # === 文件上传安全验证 ===
            # 1. 验证文件扩展名
            file_ext = Path(file.filename).suffix.lower() if file.filename else ''
            if file_ext not in ALLOWED_EXTENSIONS:
                logger.warning(f"文件类型被拒绝: {file.filename} (扩展名: {file_ext})")
                return {
                    'success': False,
                    'error': 'invalid_file_type',
                    'message': f'不支持的文件类型: {file_ext}。仅支持 .xls 和 .xlsx 格式的Excel文件'
                }

            # 2. 验证MIME类型（如果可用）
            if file.content_type and file.content_type not in ALLOWED_MIME_TYPES:
                logger.warning(f"MIME类型被拒绝: {file.filename} (类型: {file.content_type})")
                # 仅警告，不拒绝，因为MIME类型检测可能不准确

            # 3. 读取并验证文件大小
            content = await file.read()
            if len(content) > MAX_FILE_SIZE:
                logger.warning(f"文件过大: {file.filename} (大小: {len(content)} bytes)")
                return {
                    'success': False,
                    'error': 'file_too_large',
                    'message': f'文件大小超过限制: {len(content) / (1024*1024):.2f}MB，最大允许50MB'
                }

            file_hash = hashlib.sha256(content).hexdigest()

            # 解析Excel
            df = pd.read_excel(content, engine='openpyxl')
            total_rows = len(df)

            # 自动识别店铺
            auto_identified_shop = await self.auto_identify_shop_from_excel(db, df, max_rows=5)

            # 店铺验证和确定逻辑
            final_shop_name = None
            validation_message = None

            if shop_name and auto_identified_shop:
                # 两者都存在，验证是否一致
                if shop_name == auto_identified_shop:
                    final_shop_name = shop_name
                    validation_message = f"✅ 店铺验证通过：用户选择与自动识别一致 ({shop_name})"
                    logger.info(validation_message)
                else:
                    # 不一致，返回警告
                    error_msg = f"❌ 店铺不一致：用户选择 '{shop_name}' 与自动识别 '{auto_identified_shop}' 不符"
                    logger.warning(error_msg)
                    return {
                        'success': False,
                        'error': 'shop_mismatch',
                        'message': error_msg,
                        'user_selected': shop_name,
                        'auto_identified': auto_identified_shop
                    }
            elif auto_identified_shop:
                # 只有自动识别成功
                final_shop_name = auto_identified_shop
                validation_message = f"✅ 自动识别店铺：{auto_identified_shop}"
                logger.info(validation_message)
            elif shop_name:
                # 只有用户提供
                final_shop_name = shop_name
                validation_message = f"⚠️ 使用用户提供的店铺：{shop_name}（无法自动识别）"
                logger.warning(validation_message)
            else:
                # 两者都没有
                error_msg = "❌ 无法确定店铺：请手动选择店铺或确保Excel中的订单号存在于系统中"
                logger.error(error_msg)
                return {
                    'success': False,
                    'error': 'no_shop',
                    'message': error_msg
                }

            # 创建上传批次
            batch = ShippingUploadBatch(
                upload_type='shipping_list',
                网店名称=final_shop_name,
                file_name=file.filename,
                操作员=operator
            )
            db.add(batch)
            await db.flush()

            batch.file_hash = file_hash
            batch.总行数 = total_rows

            logger.info(f"开始处理发货清单，店铺：{final_shop_name}，总行数：{total_rows}")
            if validation_message:
                logger.info(f"店铺识别结果：{validation_message}")

            # 处理统计
            stats = {
                'total': total_rows,
                'matched': 0,
                'unmatched': 0,
                'level1_matched': 0,  # 层级1：单SKU匹配
                'level2_matched': 0,  # 层级2：多SKU不同商品匹配
                'level3_matched': 0,  # 层级3：多SKU相同商品匹配
                'level4_matched': 0,  # 层级4：跨店铺商品属性匹配
                'no_shipping_needed': 0,
                'duplicates': 0,
                'new_orders': 0,
                'errors': []
            }

            # 1. 获取该店铺所有现有的待发货订单
            existing_orders = await self._get_existing_orders(db, final_shop_name)
            existing_sub_orders = {order.子订单编号 for order in existing_orders}

            # 2. 获取Excel中的所有子订单号
            excel_sub_orders = set(df['子订单编号'].astype(str).tolist())

            # 3. 找出不在新清单中的订单，标记为无需发货
            orders_to_delete = existing_sub_orders - excel_sub_orders
            if orders_to_delete:
                await self._mark_orders_as_no_shipping(db, final_shop_name, orders_to_delete)
                stats['no_shipping_needed'] = len(orders_to_delete)
                logger.info(f"标记{len(orders_to_delete)}个订单为无需发货")

            # 4. 处理Excel中的每一行
            for index, row in df.iterrows():
                try:
                    sub_order_no = str(row['子订单编号'])

                    # 检查是否是重复订单
                    if sub_order_no in existing_sub_orders:
                        # 更新重复订单
                        await self._update_existing_order(db, final_shop_name, sub_order_no, row, batch.batch_id)
                        stats['duplicates'] += 1

                        # 重新尝试匹配procurement_orders（修复：已存在订单也需要重新匹配）
                        match_result = await self._match_procurement_order(db, row, final_shop_name)
                        if match_result:
                            po_id, level = match_result
                            stats['matched'] += 1
                            # 记录层级匹配统计
                            if level == 1:
                                stats['level1_matched'] += 1
                            elif level == 2:
                                stats['level2_matched'] += 1
                            elif level == 3:
                                stats['level3_matched'] += 1
                            elif level == 4:
                                stats['level4_matched'] += 1
                        else:
                            stats['unmatched'] += 1
                    else:
                        # 新增订单
                        await self._create_new_order(db, final_shop_name, row, batch.batch_id)
                        stats['new_orders'] += 1

                        # 尝试匹配procurement_orders
                        match_result = await self._match_procurement_order(db, row, final_shop_name)
                        if match_result:
                            po_id, level = match_result
                            stats['matched'] += 1
                            # 记录层级匹配统计
                            if level == 1:
                                stats['level1_matched'] += 1
                            elif level == 2:
                                stats['level2_matched'] += 1
                            elif level == 3:
                                stats['level3_matched'] += 1
                            elif level == 4:
                                stats['level4_matched'] += 1
                        else:
                            stats['unmatched'] += 1

                except Exception as e:
                    logger.error(f"处理第{index+1}行时出错：{str(e)}")
                    stats['errors'].append(f"Row {index+1}: {str(e)}")

            # 5. 更新批次统计
            batch.set_statistics(stats)
            batch.mark_completed()

            await db.commit()

            return {
                'success': True,
                'batch_id': batch.batch_id,
                'statistics': stats,
                'shop_name': final_shop_name,
                'auto_identified': auto_identified_shop is not None,
                'validation_message': validation_message,
                'message': f"成功处理{total_rows}行数据"
            }

        except Exception as e:
            import traceback
            logger.error(f"处理发货清单失败：{str(e)}")
            logger.error(f"详细堆栈：{traceback.format_exc()}")
            if 'batch' in locals():
                batch.mark_failed(str(e))
            await db.rollback()
            return {
                'success': False,
                'error': str(e),
                'message': f"处理失败：{str(e)}"
            }

    async def _get_existing_orders(self, db: AsyncSession, shop_name: str) -> List[PendingShipment]:
        """获取店铺的所有现有订单（包括已删除的，以便正确恢复和继承物流信息）"""
        result = await db.execute(
            select(PendingShipment).where(
                PendingShipment.网店名称 == shop_name
                # 不再排除 is_deleted=True，让恢复逻辑能够正确执行
                # 修复：之前排除已删除订单导致重复上传时创建新记录而非更新旧记录
            )
        )
        return result.scalars().all()

    async def _mark_orders_as_no_shipping(
        self,
        db: AsyncSession,
        shop_name: str,
        sub_order_nos: set
    ):
        """
        标记订单为无需发货（软删除）

        注意：不会修改已手动设置显示状态的订单（is_manually_set=True）
        """
        await db.execute(
            update(PendingShipment).where(
                and_(
                    PendingShipment.网店名称 == shop_name,
                    PendingShipment.子订单编号.in_(list(sub_order_nos)),
                    # 关键保护：不修改已手动设置显示状态的订单
                    PendingShipment.is_manually_set == False
                )
            ).values(
                is_deleted=True,
                deleted_reason='不在最新发货清单中',
                updated_at=datetime.now()
            )
        )

    async def _update_existing_order(
        self,
        db: AsyncSession,
        shop_name: str,
        sub_order_no: str,
        row: pd.Series,
        batch_id: str
    ):
        """更新已存在的订单"""
        # 查找现有订单
        result = await db.execute(
            select(PendingShipment).where(
                and_(
                    PendingShipment.网店名称 == shop_name,
                    PendingShipment.子订单编号 == sub_order_no
                )
            )
        )
        order = result.scalar_one_or_none()

        if order:
            # 更新字段（使用 _safe_str_value 正确处理 NaN）
            order.订单状态 = self._safe_str_value(row.get('订单状态'), '')
            order.退款状态 = self._safe_str_value(row.get('退款状态'), '')
            order.退款金额 = self._safe_str_value(row.get('退款金额'), '')

            # 使用追加模式更新备注（带时间戳）
            new_seller_note = self._safe_str_value(row.get('商家备注'), None)
            order.商家备注 = self._append_note_with_timestamp(order.商家备注, new_seller_note)

            new_buyer_remark = self._safe_str_value(row.get('主订单买家留言'), None)
            order.主订单买家留言 = self._append_note_with_timestamp(order.主订单买家留言, new_buyer_remark)

            order.数据更新时间 = datetime.now()
            order.upload_batch_id = batch_id

            # 如果之前被标记为删除，恢复它（但只恢复未手动设置显示状态的订单）
            if order.is_deleted and not order.is_manually_set:
                order.is_deleted = False
                order.deleted_reason = None

    async def _create_new_order(
        self,
        db: AsyncSession,
        shop_name: str,
        row: pd.Series,
        batch_id: str
    ):
        """创建新订单"""
        new_order = PendingShipment(
            网店名称=shop_name,
            upload_batch_id=batch_id,
            first_upload_batch_id=batch_id,  # 首次创建批次ID，永不更新
            数据更新时间=datetime.now()
        )

        # 映射所有字段
        for excel_col, db_col in self.COLUMN_MAPPING.items():
            if excel_col not in row:
                continue

            value = row[excel_col]

            # 跳过 NaN 值
            if pd.isna(value):
                continue

            # 处理日期字段
            if '时间' in excel_col:
                if isinstance(value, str):
                    # 空字符串跳过，避免 Invalid isoformat string: '' 错误
                    if value.strip() == '':
                        continue
                    try:
                        value = pd.to_datetime(value)
                    except Exception:
                        continue  # 转换失败也跳过，不设置无效值
                # 处理已经是 datetime 类型但可能是 NaT 的情况
                elif pd.isna(value):
                    continue
            # 处理数值字段
            elif '金额' in excel_col or '价格' in excel_col or '数量' in excel_col:
                try:
                    value = float(value) if pd.notna(value) else None
                except:
                    pass
            # 处理文本字段 - 使用 _safe_str_value（特别是备注字段）
            elif db_col in ('商家备注', '主订单买家留言', '备注标签', '联系方式备注'):
                value = self._safe_str_value(value, None)
                if value is None:
                    continue

            setattr(new_order, db_col, value)

        db.add(new_order)
        await db.flush()

        # 返回新订单以便后续匹配
        return new_order

    async def _analyze_order_complexity(
        self,
        db: AsyncSession,
        main_order_no: str,
        sub_order_no: str,
        product_name: str
    ) -> Tuple[int, List[Any]]:
        """
        分析订单复杂度，确定使用哪层匹配策略

        Args:
            db: 数据库会话
            main_order_no: 主订单编号
            sub_order_no: 子订单编号
            product_name: 商品标题

        Returns:
            (层级, 同主订单的记录列表)
            层级1: 单SKU订单（主订单只有一个子订单）
            层级2: 多SKU订单但商品名都不同
            层级3: 多SKU订单且有相同商品名
        """
        # 查询同一主订单下的所有记录
        result = await db.execute(
            select(
                PendingShipment.子订单编号,
                PendingShipment.商品标题
            ).where(
                PendingShipment.主订单编号 == main_order_no
            )
        )
        orders = result.all()

        # 层级1：单SKU订单
        # 条件：只有一条记录或主订单号等于子订单号
        if len(orders) <= 1 or main_order_no == sub_order_no:
            logger.debug(f"订单 {main_order_no} 判定为层级1（单SKU）")
            return 1, orders

        # 检查商品名称分布
        product_names = [o.商品标题 for o in orders]
        unique_names = set(product_names)

        # 层级2：多SKU但商品名都不同
        if len(unique_names) == len(product_names):
            logger.debug(f"订单 {main_order_no} 判定为层级2（多SKU不同商品）")
            return 2, orders

        # 层级3：有相同商品名，需要颜色尺寸匹配
        logger.debug(f"订单 {main_order_no} 判定为层级3（多SKU相同商品）")
        return 3, orders

    async def _match_procurement_order(
        self,
        db: AsyncSession,
        row: pd.Series,
        shop_name: str
    ) -> Optional[int]:
        """
        四层递进式智能匹配procurement_orders表
        支持跨店铺订单匹配（如晴七公主发货清单包含信泽供货商订单）

        Args:
            db: 数据库会话
            row: Excel行数据
            shop_name: 网店名称

        Returns:
            匹配到的procurement_order_id，未匹配返回None
        """
        try:
            main_order_no = str(row['主订单编号'])
            sub_order_no = str(row.get('子订单编号', ''))
            product_name = str(row.get('商品标题', ''))
            product_attrs = str(row.get('商品属性', ''))
            quantity = int(row.get('购买数量', 1))

            # 获取关联店铺列表（支持跨店铺匹配）
            associated_shops = self.SHOP_ASSOCIATIONS.get(shop_name, [shop_name])
            logger.debug(f"店铺 {shop_name} 的关联店铺列表：{associated_shops}")

            # 分析订单复杂度，确定匹配策略
            level, _ = await self._analyze_order_complexity(
                db, main_order_no, sub_order_no, product_name
            )

            po_id = None
            confidence = 0.0

            # 根据不同层级执行不同匹配策略
            if level == 1:
                # 层级1：单SKU订单
                # 优化：先尝试用主订单号+商品名匹配（防止多SKU采购订单匹配错误）
                # 只有当商品名匹配失败时，才回退到仅用订单号匹配
                logger.debug(f"使用层级1匹配策略：主订单号 {main_order_no}")

                # 先检查该主订单在procurement_orders中有多少条记录
                po_count_result = await db.execute(
                    select(func.count()).where(
                        and_(
                            ProcurementOrder.原始订单编号 == main_order_no,
                            ProcurementOrder.网店名称.in_(associated_shops)
                        )
                    )
                )
                po_count = po_count_result.scalar() or 0

                if po_count > 1:
                    # 采购订单有多条记录，必须用商品名匹配（类似Level 2）
                    logger.debug(f"订单 {main_order_no} 在采购表中有 {po_count} 条记录，升级为商品名匹配")
                    result = await db.execute(
                        select(ProcurementOrder.id).where(
                            and_(
                                ProcurementOrder.原始订单编号 == main_order_no,
                                ProcurementOrder.线上宝贝名称 == product_name,
                                ProcurementOrder.网店名称.in_(associated_shops)
                            )
                        ).limit(1)
                    )
                    po_id = result.scalar()
                    if po_id:
                        confidence = 0.95  # 商品名匹配置信度
                        logger.info(f"层级1(升级)匹配成功：订单 {main_order_no} 商品 {product_name[:30]}")
                else:
                    # 采购订单只有1条或0条，直接用订单号匹配
                    result = await db.execute(
                        select(ProcurementOrder.id).where(
                            and_(
                                ProcurementOrder.原始订单编号 == main_order_no,
                                ProcurementOrder.网店名称.in_(associated_shops)
                            )
                        ).limit(1)
                    )
                    po_id = result.scalar()
                    if po_id:
                        confidence = 1.0
                        logger.info(f"层级1匹配成功：订单 {main_order_no}")

            elif level == 2:
                # 层级2：多SKU不同商品，用主订单号+商品名匹配（限定关联店铺）
                logger.debug(f"使用层级2匹配策略：主订单号 {main_order_no} + 商品名")
                result = await db.execute(
                    select(ProcurementOrder.id).where(
                        and_(
                            ProcurementOrder.原始订单编号 == main_order_no,
                            ProcurementOrder.线上宝贝名称 == product_name,
                            ProcurementOrder.网店名称.in_(associated_shops)
                        )
                    ).limit(1)
                )
                po_id = result.scalar()
                if po_id:
                    confidence = 0.95
                    logger.info(f"层级2匹配成功：订单 {main_order_no} 商品 {product_name[:30]}")

            else:  # level == 3
                # 层级3：多SKU相同商品，需要提取颜色和尺寸进行精确匹配（限定关联店铺）
                logger.debug(f"使用层级3匹配策略：需要颜色和尺寸匹配")

                # 使用AttributeExtractor提取颜色和尺寸
                from app.utils.text_parser import AttributeExtractor
                extractor = AttributeExtractor()
                attrs = extractor.extract_attributes(product_name, product_attrs)
                color = attrs.get('颜色', '')
                size = attrs.get('尺寸', '') or attrs.get('尺码', '')

                logger.debug(f"提取的属性 - 颜色: {color}, 尺寸: {size}")

                # 首先尝试精确匹配（主订单号+商品名+颜色+尺寸+关联店铺）
                result = await db.execute(
                    select(ProcurementOrder.id).where(
                        and_(
                            ProcurementOrder.原始订单编号 == main_order_no,
                            ProcurementOrder.线上宝贝名称 == product_name,
                            ProcurementOrder.颜色 == color if color else ProcurementOrder.颜色.is_(None),
                            ProcurementOrder.尺寸 == size if size else ProcurementOrder.尺寸.is_(None),
                            ProcurementOrder.网店名称.in_(associated_shops)
                        )
                    ).limit(1)
                )
                po_id = result.scalar()

                if po_id:
                    confidence = 0.9
                    logger.info(f"层级3精确匹配成功：订单 {main_order_no} 颜色 {color} 尺寸 {size}")
                else:
                    # 如果精确匹配失败，尝试宽松匹配（主订单号+商品名+数量+关联店铺）
                    logger.debug(f"精确匹配失败，尝试宽松匹配")
                    result = await db.execute(
                        select(ProcurementOrder.id).where(
                            and_(
                                ProcurementOrder.原始订单编号 == main_order_no,
                                ProcurementOrder.线上宝贝名称 == product_name,
                                ProcurementOrder.数量 == quantity,
                                ProcurementOrder.网店名称.in_(associated_shops)
                            )
                        ).limit(1)
                    )
                    po_id = result.scalar()
                    if po_id:
                        confidence = 0.8
                        logger.info(f"层级3宽松匹配成功：订单 {main_order_no} 数量 {quantity}")

            # 层级4：跨店铺商品属性匹配
            # 当前面3个层级都失败时，尝试通过商品属性（商品名+颜色+尺寸）进行跨店铺匹配
            if not po_id:
                logger.debug(f"层级1-3匹配失败，尝试层级4：跨店铺商品属性匹配")

                # 提取商品属性
                from app.utils.text_parser import AttributeExtractor
                extractor = AttributeExtractor()
                attrs = extractor.extract_attributes(product_name, product_attrs)
                color = attrs.get('颜色', '')
                size = attrs.get('尺寸', '') or attrs.get('尺码', '')

                logger.debug(f"层级4 - 商品名: {product_name[:50]}, 颜色: {color}, 尺寸: {size}")

                # 尝试通过商品名+颜色+尺寸+关联店铺匹配
                conditions = [
                    ProcurementOrder.线上宝贝名称 == product_name,
                    ProcurementOrder.网店名称.in_(associated_shops)
                ]

                # 只有当颜色和尺寸都存在时才添加这些条件
                if color:
                    conditions.append(ProcurementOrder.颜色 == color)
                if size:
                    conditions.append(ProcurementOrder.尺寸 == size)

                result = await db.execute(
                    select(ProcurementOrder.id).where(
                        and_(*conditions)
                    ).limit(1)
                )
                po_id = result.scalar()

                if po_id:
                    confidence = 0.7
                    logger.info(f"层级4跨店铺匹配成功：商品 {product_name[:30]} 颜色 {color} 尺寸 {size}")
                    level = 4  # 标记为层级4匹配

            # 更新匹配结果
            if po_id:
                # 匹配成功，更新记录
                await db.execute(
                    update(PendingShipment).where(
                        and_(
                            PendingShipment.子订单编号 == sub_order_no,
                            PendingShipment.网店名称 == shop_name
                        )
                    ).values(
                        procurement_order_id=po_id,
                        match_status='matched',
                        match_confidence=confidence
                    )
                )
                # 返回匹配层级信息用于统计
                return (po_id, level)
            else:
                # 匹配失败
                logger.warning(f"所有层级匹配均失败：订单 {main_order_no} 子订单 {sub_order_no}")
                await db.execute(
                    update(PendingShipment).where(
                        and_(
                            PendingShipment.子订单编号 == sub_order_no,
                            PendingShipment.网店名称 == shop_name
                        )
                    ).values(
                        match_status='failed',
                        match_confidence=0.0
                    )
                )
                return None

        except Exception as e:
            logger.error(f"匹配procurement_order时出错：{str(e)}")
            return None

    async def identify_shop_from_order(
        self,
        db: AsyncSession,
        order_number: str
    ) -> Optional[str]:
        """
        根据订单号识别店铺
        通过查询procurement_orders表匹配原始订单编号

        Args:
            db: 数据库会话
            order_number: 主订单编号

        Returns:
            识别到的店铺名称，未找到返回None
        """
        try:
            result = await db.execute(
                select(ProcurementOrder.网店名称)
                .where(ProcurementOrder.原始订单编号 == str(order_number))
                .limit(1)
            )
            shop_name = result.scalar_one_or_none()

            if shop_name:
                logger.info(f"自动识别店铺成功：订单号 {order_number} → 店铺 {shop_name}")
            else:
                logger.debug(f"未找到订单号 {order_number} 对应的店铺")

            return shop_name
        except Exception as e:
            logger.error(f"识别店铺时出错：{str(e)}")
            return None

    async def auto_identify_shop_from_excel(
        self,
        db: AsyncSession,
        df: pd.DataFrame,
        max_rows: int = 5
    ) -> Optional[str]:
        """
        从Excel数据中自动识别店铺
        逐行提取主订单号，直到找到匹配的店铺

        Args:
            db: 数据库会话
            df: Excel数据DataFrame
            max_rows: 最多检查的行数

        Returns:
            识别到的店铺名称，未找到返回None
        """
        if '主订单编号' not in df.columns:
            logger.warning("Excel中未找到'主订单编号'列")
            return None

        # 提取前N行的主订单号
        order_numbers = df['主订单编号'].head(max_rows).astype(str).tolist()

        logger.info(f"开始自动识别店铺，检查前{len(order_numbers)}个订单号")

        # 逐个尝试识别
        for order_no in order_numbers:
            if pd.isna(order_no) or str(order_no).strip() == '':
                continue

            shop_name = await self.identify_shop_from_order(db, order_no)
            if shop_name:
                logger.info(f"✅ 自动识别成功：通过订单号 {order_no} 识别为店铺 {shop_name}")
                return shop_name

        logger.warning(f"⚠️ 无法自动识别店铺：前{max_rows}行订单号均未匹配")
        return None