#!/usr/bin/env python3
"""
测试Excel文件解析
"""

import sys
import pandas as pd
from pathlib import Path
sys.path.append('backend')

from app.utils.excel_reader import ExcelReader

def analyze_excel_file(file_path):
    """分析Excel文件"""
    print(f"分析文件: {file_path}")
    
    if not Path(file_path).exists():
        print(f"文件不存在: {file_path}")
        return
    
    try:
        # 根据文件扩展名选择读取方式
        if file_path.endswith('.csv'):
            df_raw = pd.read_csv(file_path, encoding='utf-8')
        else:
            df_raw = pd.read_excel(file_path, engine='openpyxl' if file_path.endswith('.xlsx') else 'xlrd')
        print(f"原始读取: {len(df_raw)} 行, {len(df_raw.columns)} 列")
        print(f"列名: {list(df_raw.columns)}")
        
        # 显示前几行
        print("\n前10行数据:")
        print(df_raw.head(10))
        
        # 检查空行情况
        print(f"\n完全空的行数: {df_raw.isnull().all(axis=1).sum()}")
        print(f"有任何数据的行数: {(~df_raw.isnull().all(axis=1)).sum()}")
        
        # 检查每列的非空值数量
        print("\n每列非空值统计:")
        for col in df_raw.columns:
            non_null_count = df_raw[col].notna().sum()
            print(f"  {col}: {non_null_count}/{len(df_raw)} 非空")
        
        # 使用我们的ExcelReader处理
        print("\n使用ExcelReader处理:")
        reader = ExcelReader()
        records = reader.process_excel_file(Path(file_path))
        print(f"处理后记录数: {len(records)}")
        
        # 显示前几条记录
        print("\n前5条处理后的记录:")
        for i, record in enumerate(records[:5]):
            print(f"记录 {i+1}: {record}")
            
    except Exception as e:
        print(f"解析错误: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    # 测试文件路径 - 你需要提供实际的文件路径
    test_files = [
        "../原始订单导出-2025-08-20.xls",  # 上级目录中的原始文件
        "/tmp/ordersys_uploads/manual_20250820_221355_原始订单导出-2025-08-20.xls",
        "原始订单导出-2025-08-20.xls",  # 如果在当前目录
        "test_order_sample.csv",  # 测试CSV文件
    ]
    
    for file_path in test_files:
        if Path(file_path).exists():
            analyze_excel_file(file_path)
            break
    else:
        print("请提供正确的Excel文件路径")
        print("可能的文件位置:")
        for p in Path("/tmp").glob("**/原始订单导出-2025-08-20.xls"):
            print(f"  {p}")