#!/usr/bin/env python3
"""
测试下载阿里云图片到本地
"""

import asyncio
import aiohttp
import aiofiles
import hashlib
import os
import sys
import time
from pathlib import Path
from urllib.parse import urlparse

# 添加项目根目录到 Python 路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from app.core.database import get_db
from sqlalchemy import text

# 图片存储目录
IMAGES_DIR = Path(__file__).parent.parent / "images"
IMAGES_DIR.mkdir(exist_ok=True)

async def get_sample_image_urls(limit=10):
    """获取样例图片URL"""
    image_urls = []
    
    async for db in get_db():
        try:
            result = await db.execute(text("""
                SELECT DISTINCT norm.图片链接, po.线上宝贝名称, po.线上销售属性
                FROM procurement_orders po
                LEFT JOIN order_items_norm norm ON po.original_order_id = norm.id
                WHERE norm.图片链接 IS NOT NULL 
                AND norm.图片链接 != ''
                AND po.procurement_method = 'LA'
                LIMIT :limit
            """), {"limit": limit})
            
            for row in result.fetchall():
                # 解析JSON格式的图片链接
                import json
                try:
                    # 双重JSON解析
                    first_parse = json.loads(row.图片链接)
                    if isinstance(first_parse, str):
                        urls = json.loads(first_parse)
                    else:
                        urls = first_parse
                    
                    if isinstance(urls, list) and urls:
                        image_urls.append({
                            'url': urls[0],  # 取第一张图片
                            'product_name': row.线上宝贝名称,
                            'sales_attr': row.线上销售属性
                        })
                except (json.JSONDecodeError, TypeError):
                    continue
            
        except Exception as e:
            print(f"获取图片URL失败: {e}")
        
        break
    
    return image_urls

def get_image_filename(url, product_name, sales_attr):
    """根据URL和商品信息生成唯一的文件名"""
    # 解析URL获取文件扩展名
    parsed_url = urlparse(url)
    path = parsed_url.path
    
    # 获取文件扩展名
    ext = Path(path).suffix
    if not ext:
        ext = '.jpg'  # 默认扩展名
    
    # 生成基于URL的MD5哈希作为文件名
    url_hash = hashlib.md5(url.encode()).hexdigest()[:12]
    
    # 清理商品名称和销售属性中的特殊字符
    safe_product = "".join(c for c in product_name if c.isalnum() or c in (' ', '-', '_')).strip()[:20]
    safe_attr = "".join(c for c in sales_attr if c.isalnum() or c in (' ', '-', '_')).strip()[:15]
    
    # 组合文件名
    filename = f"{url_hash}_{safe_product}_{safe_attr}{ext}".replace(' ', '_')
    
    return filename

async def download_image(session, image_info, max_size_mb=5):
    """下载单个图片"""
    url = image_info['url']
    product_name = image_info['product_name']
    sales_attr = image_info['sales_attr']
    
    filename = get_image_filename(url, product_name, sales_attr)
    filepath = IMAGES_DIR / filename
    
    # 如果文件已存在，跳过下载
    if filepath.exists():
        file_size = filepath.stat().st_size
        print(f"✓ 文件已存在: {filename} ({file_size:,} bytes)")
        return {
            'success': True,
            'filename': filename,
            'size': file_size,
            'status': 'exists'
        }
    
    try:
        start_time = time.time()
        
        # 设置请求头，模拟浏览器访问
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Referer': 'https://www.taobao.com/',
            'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        }
        
        # 下载图片
        async with session.get(url, headers=headers, timeout=30) as response:
            if response.status == 200:
                content_length = response.headers.get('content-length')
                if content_length:
                    size_mb = int(content_length) / (1024 * 1024)
                    if size_mb > max_size_mb:
                        print(f"✗ 图片过大: {filename} ({size_mb:.1f}MB > {max_size_mb}MB)")
                        return {
                            'success': False,
                            'error': f'文件过大: {size_mb:.1f}MB',
                            'filename': filename
                        }
                
                # 写入文件
                async with aiofiles.open(filepath, 'wb') as f:
                    async for chunk in response.content.iter_chunked(8192):
                        await f.write(chunk)
                
                download_time = time.time() - start_time
                file_size = filepath.stat().st_size
                
                print(f"✓ 下载成功: {filename}")
                print(f"  商品: {product_name[:30]}...")
                print(f"  属性: {sales_attr}")
                print(f"  大小: {file_size:,} bytes ({file_size/1024:.1f}KB)")
                print(f"  用时: {download_time:.2f}秒")
                print(f"  速度: {file_size/1024/download_time:.1f}KB/s")
                
                return {
                    'success': True,
                    'filename': filename,
                    'size': file_size,
                    'download_time': download_time,
                    'status': 'downloaded'
                }
            else:
                print(f"✗ 下载失败: {filename} (HTTP {response.status})")
                return {
                    'success': False,
                    'error': f'HTTP {response.status}',
                    'filename': filename
                }
                
    except asyncio.TimeoutError:
        print(f"✗ 下载超时: {filename}")
        return {
            'success': False,
            'error': '下载超时',
            'filename': filename
        }
    except Exception as e:
        print(f"✗ 下载异常: {filename} - {e}")
        return {
            'success': False,
            'error': str(e),
            'filename': filename
        }

async def test_image_download():
    """测试图片下载功能"""
    print("=== 测试阿里云图片下载到本地 ===\n")
    print(f"图片存储目录: {IMAGES_DIR}")
    
    # 获取样例图片URL
    print("1. 获取样例图片URL...")
    image_urls = await get_sample_image_urls(10)
    
    if not image_urls:
        print("未找到图片URL")
        return
    
    print(f"   获取到 {len(image_urls)} 个图片URL")
    
    # 显示将要下载的图片信息
    print("\n2. 图片下载列表:")
    for i, img_info in enumerate(image_urls, 1):
        print(f"   {i:2d}. {img_info['product_name'][:40]}...")
        print(f"       属性: {img_info['sales_attr']}")
        print(f"       URL: {img_info['url'][:60]}...")
        print()
    
    # 开始下载
    print("3. 开始下载图片...")
    
    # 创建HTTP会话
    connector = aiohttp.TCPConnector(limit=5, limit_per_host=3)
    timeout = aiohttp.ClientTimeout(total=60, connect=10)
    
    async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
        
        # 并发下载（限制并发数）
        semaphore = asyncio.Semaphore(3)  # 最多3个并发下载
        
        async def download_with_semaphore(image_info):
            async with semaphore:
                return await download_image(session, image_info)
        
        # 执行下载
        tasks = [download_with_semaphore(img_info) for img_info in image_urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)
        
        # 统计结果
        print(f"\n4. 下载结果统计:")
        successful = 0
        failed = 0
        total_size = 0
        total_time = 0
        
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                print(f"   异常: {result}")
                failed += 1
            elif result['success']:
                successful += 1
                total_size += result['size']
                if result.get('download_time'):
                    total_time += result['download_time']
            else:
                failed += 1
        
        print(f"   成功下载: {successful} 个")
        print(f"   下载失败: {failed} 个")
        print(f"   总大小: {total_size:,} bytes ({total_size/1024:.1f}KB)")
        if total_time > 0:
            print(f"   总用时: {total_time:.2f}秒")
            print(f"   平均速度: {total_size/1024/total_time:.1f}KB/s")
        
        # 列出下载的文件
        print(f"\n5. 下载的文件列表:")
        downloaded_files = list(IMAGES_DIR.glob("*"))
        for file_path in downloaded_files:
            file_size = file_path.stat().st_size
            print(f"   {file_path.name} ({file_size:,} bytes)")

async def main():
    """主函数"""
    try:
        await test_image_download()
    except KeyboardInterrupt:
        print("\n用户中断下载")
    except Exception as e:
        print(f"测试过程中发生错误: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    asyncio.run(main())