#!/usr/bin/env python3
"""
Ralph Loop: 采购方式判断逻辑迭代优化
反复核对全部订单，分析不一致案例，输出改进建议
"""

import asyncio
import sys
import re
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Any
from collections import Counter, defaultdict
from dataclasses import dataclass, field

sys.path.insert(0, str(Path(__file__).parent))

from app.core.database import get_db
from sqlalchemy import text


@dataclass
class MismatchCase:
    """不一致案例"""
    product_name: str
    sales_attr: str
    db_method: str
    recalc_method: str
    rule: str
    count: int = 1  # 相同模式出现次数


class ProcurementMethodAnalyzer:
    """采购方式分析器 - 用于Ralph Loop迭代"""

    def __init__(self):
        self.iteration = 0
        self.history = []  # 迭代历史

    # ================== 采购方式判断逻辑 ==================

    def _determine_procurement_method(self, product_name: str,
                                       sales_attributes: str = "",
                                       brand: str = "") -> Tuple[str, str]:
        """采购方式判断逻辑"""
        product_name = product_name.strip() if product_name else ""
        sales_attributes = sales_attributes.strip() if sales_attributes else ""
        brand = brand.strip() if brand else ""

        product_name_upper = product_name.upper()
        sales_attributes_upper = sales_attributes.upper()
        brand_upper = brand.upper()

        # 规则1: MC库存
        if (product_name_upper.endswith(' MC') or
            product_name_upper.endswith('MC') or
            product_name_upper.endswith(' ZT') or
            product_name_upper.endswith('ZT') or
            'MC' in sales_attributes_upper or
            'ZT' in sales_attributes_upper):
            return "MC", "R1:MC/ZT"

        # 规则2: SF
        if self._has_sf_suffix(product_name_upper):
            if self._has_independent_la(sales_attributes_upper):
                return "LA", "R2:SF+LA属性"
            elif self._has_independent_ny(sales_attributes_upper):
                return "NY", "R2:SF+NY属性"
            elif 'MC' in sales_attributes_upper or 'ZT' in sales_attributes_upper:
                return "MC", "R2:SF+MC属性"
            else:
                return "SF", "R2:SF后缀"

        # 规则3: FL
        if self._has_fl_suffix(product_name_upper):
            if self._has_independent_la(sales_attributes_upper):
                return "LA", "R3:FL+LA属性"
            elif self._has_independent_ny(sales_attributes_upper):
                return "NY", "R3:FL+NY属性"
            elif 'MC' in sales_attributes_upper or 'ZT' in sales_attributes_upper:
                return "MC", "R3:FL+MC属性"
            else:
                return "FL", "R3:FL后缀"

        # 规则3.5: CA
        if self._has_ca_suffix(product_name_upper):
            if self._has_independent_la(sales_attributes_upper):
                return "LA", "R3.5:CA+LA属性"
            elif self._has_independent_ny(sales_attributes_upper):
                return "NY", "R3.5:CA+NY属性"
            elif 'MC' in sales_attributes_upper or 'ZT' in sales_attributes_upper:
                return "MC", "R3.5:CA+MC属性"
            else:
                return "CA", "R3.5:CA后缀"

        # 规则4: NY属性
        if self._has_independent_ny(sales_attributes_upper):
            return "NY", "R4:NY属性"

        # 规则4.5: CA属性
        if self._has_independent_ca(sales_attributes_upper):
            return "CA", "R4.5:CA属性"

        # 规则5: AT/ATM
        if (self._has_at_suffix(product_name_upper) and
            'MC' not in sales_attributes_upper and
            'ZT' not in sales_attributes_upper):
            return "AT", "R5:AT后缀"

        # 规则6: AP/APM
        if ((product_name_upper.endswith(' AP') or
             product_name_upper.endswith('AP') or
             product_name_upper.endswith(' APM') or
             product_name_upper.endswith('APM')) and
            'MC' not in sales_attributes_upper and
            'ZT' not in sales_attributes_upper):
            return "AP", "R6:AP后缀"

        # 规则7: LA/LAM
        if (product_name_upper.endswith(' LA') or
            product_name_upper.endswith('LA') or
            product_name_upper.endswith(' LAM') or
            product_name_upper.endswith('LAM')):
            return "LA", "R7:LA后缀"

        # 规则8: SS/SSM
        if ((product_name_upper.endswith(' SS') or
             product_name_upper.endswith('SS') or
             product_name_upper.endswith(' SSM') or
             product_name_upper.endswith('SSM')) and
            'MC' not in sales_attributes_upper and
            'ZT' not in sales_attributes_upper):
            return "SS", "R8:SS后缀"

        # 后缀排除列表
        suffixes_to_exclude = ['LA', 'MC', 'ZT', 'LAM', 'SS', 'SSM', 'AT', 'ATM',
                               'AP', 'APM', 'SF', 'SFM', 'FL', 'FLM', 'CA', 'CAM']
        has_excluded_suffix = any(
            product_name_upper.endswith(f' {suffix}') or
            product_name_upper.endswith(suffix)
            for suffix in suffixes_to_exclude
        )

        # 规则9-12: 属性中的地点标记
        if not has_excluded_suffix:
            if self._has_independent_la(sales_attributes_upper):
                return "LA", "R9:LA属性"
            if self._has_independent_sf(sales_attributes_upper):
                return "SF", "R10:SF属性"
            if self._has_independent_fl(sales_attributes_upper):
                return "FL", "R11:FL属性"
            if self._has_independent_ca(sales_attributes_upper):
                return "CA", "R12:CA属性"

        # 规则13: 国内现货
        if (self._has_stock_keywords(product_name) or
            self._is_domestic_brand(brand_upper)):
            return "GN", "R13:国内现货"

        # 规则14: 默认NY
        return "NY", "R14:默认NY"

    # ================== 辅助函数 ==================

    def _has_sf_suffix(self, t: str) -> bool:
        return (t.endswith(' SF') or t.endswith('SF') or
                t.endswith(' SFM') or t.endswith('SFM') or
                t.endswith(' SF M') or t.endswith('SF M'))

    def _has_fl_suffix(self, t: str) -> bool:
        return (t.endswith(' FL') or t.endswith('FL') or
                t.endswith(' FLM') or t.endswith('FLM') or
                t.endswith(' FL M') or t.endswith('FL M'))

    def _has_ca_suffix(self, t: str) -> bool:
        return (t.endswith(' CA') or t.endswith('CA') or
                t.endswith(' CAM') or t.endswith('CAM') or
                t.endswith(' CA M') or t.endswith('CA M'))

    def _has_at_suffix(self, t: str) -> bool:
        """AT后缀检查 - 包含日期+AT模式"""
        if (t.endswith(' AT') or t.endswith(' ATM') or
            t.endswith(' AT M') or t.endswith('AT M')):
            return True

        if t.endswith('AT') or t.endswith('ATM'):
            suffix_len = 3 if t.endswith('ATM') else 2
            if len(t) > suffix_len:
                char_before = t[-(suffix_len + 1)]
                if char_before == ' ' or ord(char_before) > 127:
                    return True
                if char_before.isdigit():
                    pattern = r'(?:^|[\s\u4e00-\u9fff])\d{1,2}\.\d{1,2}ATM?$'
                    if re.search(pattern, t):
                        return True
                # 新增: 检查 特殊字符+AT 模式（如 ?AT）
                # 前面是特殊字符（非字母数字）时，检查再前面是否是空格
                if not char_before.isalnum() and len(t) > suffix_len + 1:
                    char_before_special = t[-(suffix_len + 2)]
                    if char_before_special == ' ' or ord(char_before_special) > 127:
                        return True
            elif len(t) == suffix_len:
                return True

        pattern = r'(?:^|[\s\u4e00-\u9fff])ATM?\s*\d{1,2}\.\d{1,2}$'
        if re.search(pattern, t):
            return True

        return False

    def _has_independent_la(self, t: str) -> bool:
        return bool(re.search(r'LA(?=_|\s|$|\d|\[)', t))

    def _has_independent_ny(self, t: str) -> bool:
        return bool(re.search(r'NY(?=_|\s|$|\[)', t))

    def _has_independent_sf(self, t: str) -> bool:
        return bool(re.search(r'SF(?=_|\s|$|\[)', t))

    def _has_independent_fl(self, t: str) -> bool:
        return bool(re.search(r'FL(?=_|\s|$|\[)', t))

    def _has_independent_ca(self, t: str) -> bool:
        return bool(re.search(r'CA(?=_|\s|$|\[)', t))

    def _has_stock_keywords(self, t: str) -> bool:
        return ('现货' in t or 'GN现货' in t or '差价' in t)

    def _is_domestic_brand(self, brand_upper: str) -> bool:
        domestic_brands = ['GRUNDZERO', 'JANSPORT', 'EMU']
        return any(b in brand_upper for b in domestic_brands)

    # ================== 分析函数 ==================

    async def analyze_all_orders(self, db) -> Dict[str, Any]:
        """分析全部订单"""
        query = text("""
            SELECT
                r.线上宝贝名称 as product_name,
                r.线上销售属性 as sales_attributes,
                p.品牌 as brand,
                p.procurement_method as db_method
            FROM raw_orders r
            LEFT JOIN products_master p
                ON r.线上宝贝名称 = p.线上宝贝名称
                AND (r.线上销售属性 = p.线上销售属性
                     OR (r.线上销售属性 IS NULL AND p.线上销售属性 IS NULL))
        """)

        result = await db.execute(query)
        rows = result.fetchall()

        total = len(rows)
        match = 0
        mismatch_cases = defaultdict(list)
        method_stats = Counter()
        rule_stats = Counter()

        for row in rows:
            product_name = row.product_name or ""
            sales_attr = row.sales_attributes or ""
            brand = row.brand or ""
            db_method = row.db_method or "未关联"

            recalc_method, rule = self._determine_procurement_method(
                product_name, sales_attr, brand
            )

            method_stats[db_method] += 1
            rule_stats[rule] += 1

            if db_method == recalc_method:
                match += 1
            else:
                # 按模式分组不一致案例
                pattern_key = f"{db_method}->{recalc_method}|{rule}"
                mismatch_cases[pattern_key].append({
                    'name': product_name[:60],
                    'attr': sales_attr[:30] if sales_attr else "",
                    'db': db_method,
                    'calc': recalc_method,
                    'rule': rule
                })

        return {
            'total': total,
            'match': match,
            'mismatch': total - match,
            'accuracy': match / total * 100 if total > 0 else 0,
            'method_stats': dict(method_stats),
            'rule_stats': dict(rule_stats),
            'mismatch_cases': dict(mismatch_cases)
        }

    def print_iteration_report(self, stats: Dict[str, Any], iteration: int):
        """打印迭代报告"""
        print(f"\n{'='*70}")
        print(f"  Ralph Loop 迭代 #{iteration}")
        print(f"{'='*70}")
        print(f"  总订单数: {stats['total']:,}")
        print(f"  一致数量: {stats['match']:,}")
        print(f"  不一致数: {stats['mismatch']:,}")
        print(f"  准确率:   {stats['accuracy']:.4f}%")
        print()

        if stats['mismatch'] == 0:
            print("  ✓ 完美! 所有订单判断一致!")
            return

        # 按不一致数量排序显示
        print("【不一致模式分析】")
        sorted_cases = sorted(
            stats['mismatch_cases'].items(),
            key=lambda x: len(x[1]),
            reverse=True
        )

        for i, (pattern, cases) in enumerate(sorted_cases[:15], 1):
            parts = pattern.split('|')
            direction = parts[0]
            rule = parts[1] if len(parts) > 1 else ""
            print(f"\n  {i}. [{len(cases):4d}条] {direction} ({rule})")
            # 显示2个样例
            for c in cases[:2]:
                print(f"      商品: {c['name']}")
                if c['attr']:
                    print(f"      属性: {c['attr']}")

        print(f"\n{'='*70}")


async def run_ralph_loop(max_iterations: int = 100):
    """执行Ralph Loop"""
    analyzer = ProcurementMethodAnalyzer()

    print("=" * 70)
    print("  Ralph Loop: 采购方式判断逻辑迭代优化")
    print(f"  最大迭代次数: {max_iterations}")
    print("=" * 70)

    best_accuracy = 0
    no_improvement_count = 0

    async for db in get_db():
        for iteration in range(1, max_iterations + 1):
            stats = await analyzer.analyze_all_orders(db)
            analyzer.print_iteration_report(stats, iteration)

            # 检查是否达到100%
            if stats['accuracy'] >= 100.0:
                print(f"\n✓ 迭代 #{iteration}: 达到100%准确率!")
                break

            # 检查是否有改进
            if stats['accuracy'] > best_accuracy:
                best_accuracy = stats['accuracy']
                no_improvement_count = 0
            else:
                no_improvement_count += 1

            # 如果连续5次没有改进且准确率很高，可以停止
            if no_improvement_count >= 5 and best_accuracy > 99.9:
                print(f"\n✓ 连续{no_improvement_count}次无改进，准确率已达{best_accuracy:.4f}%")
                break

            # 输出改进建议
            if stats['mismatch'] > 0:
                print("\n【改进建议】")
                top_pattern = max(stats['mismatch_cases'].items(), key=lambda x: len(x[1]))
                pattern, cases = top_pattern
                parts = pattern.split('|')
                print(f"  优先处理: {parts[0]} ({len(cases)}条)")

                # 分析具体原因
                if "NY->AT" in pattern:
                    print("  原因: 这些订单末尾有AT模式但数据库判断为NY")
                    print("  建议: 需要重新生成products_master以应用新逻辑")
                elif "AT->NY" in pattern:
                    print("  原因: AT后缀判断逻辑可能遗漏了某些模式")
                    print("  建议: 检查_has_at_suffix函数是否覆盖所有情况")

            # 由于无法在循环中自动修改代码，输出分析后退出
            print("\n" + "=" * 70)
            print("  分析完成。如需继续优化，请根据上述建议修改代码后重新运行。")
            print("=" * 70)
            break

        break


if __name__ == "__main__":
    asyncio.run(run_ralph_loop(100))