#!/usr/bin/env python3
"""
批次性能测试结果可视化
生成批次大小 vs 性能指标的对比图表
"""

import json
import matplotlib.pyplot as plt
import numpy as np
import os
from datetime import datetime

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

def load_results(json_file):
    """加载测试结果"""
    with open(json_file, 'r', encoding='utf-8') as f:
        return json.load(f)

def create_throughput_chart(results, output_dir):
    """创建吞吐量对比图表"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # 提取成功的测试数据
    batch_sizes = []
    throughputs = []
    latencies = []
    
    for test in results['batch_tests']:
        if test['success']:
            batch_sizes.append(test['batch_size'])
            throughputs.append(test['avg_throughput'])
            latencies.append(test['avg_latency_ms'])
    
    if not batch_sizes:
        print("⚠️ 没有成功的测试数据")
        return
    
    # 吞吐量图表
    ax1.plot(batch_sizes, throughputs, 'o-', color='#4ECDC4', 
             linewidth=2, markersize=10, label='吞吐量')
    ax1.set_title('批次大小 vs 吞吐量', fontsize=14, fontweight='bold')
    ax1.set_xlabel('批次大小', fontsize=12)
    ax1.set_ylabel('吞吐量 (FPS)', fontsize=12)
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    
    # 添加数值标签
    for x, y in zip(batch_sizes, throughputs):
        ax1.text(x, y + max(throughputs)*0.02, f'{y:.1f}', 
                ha='center', va='bottom', fontweight='bold')
    
    # 找到最佳批次大小
    best_idx = np.argmax(throughputs)
    ax1.scatter([batch_sizes[best_idx]], [throughputs[best_idx]], 
               color='red', s=200, marker='*', zorder=5,
               label=f'最佳: Batch {batch_sizes[best_idx]}')
    ax1.legend()
    
    # 延迟图表
    ax2.plot(batch_sizes, latencies, 'o-', color='#FF6B6B', 
             linewidth=2, markersize=10, label='延迟')
    ax2.set_title('批次大小 vs 延迟', fontsize=14, fontweight='bold')
    ax2.set_xlabel('批次大小', fontsize=12)
    ax2.set_ylabel('延迟 (ms)', fontsize=12)
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    
    # 添加数值标签
    for x, y in zip(batch_sizes, latencies):
        ax2.text(x, y + max(latencies)*0.02, f'{y:.1f}', 
                ha='center', va='bottom', fontweight='bold')
    
    # 找到最低延迟
    best_latency_idx = np.argmin(latencies)
    ax2.scatter([batch_sizes[best_latency_idx]], [latencies[best_latency_idx]], 
               color='green', s=200, marker='*', zorder=5,
               label=f'最低: Batch {batch_sizes[best_latency_idx]}')
    ax2.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'batch_throughput_latency.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✅ 生成图表: batch_throughput_latency.png")

def create_gpu_utilization_chart(results, output_dir):
    """创建 GPU 利用率和内存使用图表"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # 提取数据
    batch_sizes = []
    gpu_utils = []
    gpu_memories = []
    
    for test in results['batch_tests']:
        if test['success']:
            batch_sizes.append(test['batch_size'])
            gpu_utils.append(test['avg_gpu_util'])
            gpu_memories.append(test['avg_gpu_memory_mb'])
    
    if not batch_sizes:
        return
    
    # GPU 利用率图表
    ax1.bar(batch_sizes, gpu_utils, color='#95E1D3', alpha=0.8, edgecolor='black')
    ax1.set_title('批次大小 vs GPU 利用率', fontsize=14, fontweight='bold')
    ax1.set_xlabel('批次大小', fontsize=12)
    ax1.set_ylabel('GPU 利用率 (%)', fontsize=12)
    ax1.grid(True, alpha=0.3, axis='y')
    
    # 添加数值标签
    for x, y in zip(batch_sizes, gpu_utils):
        ax1.text(x, y + max(gpu_utils)*0.02, f'{y:.1f}%', 
                ha='center', va='bottom', fontweight='bold')
    
    # GPU 内存使用图表
    ax2.bar(batch_sizes, gpu_memories, color='#F38181', alpha=0.8, edgecolor='black')
    ax2.set_title('批次大小 vs GPU 内存使用', fontsize=14, fontweight='bold')
    ax2.set_xlabel('批次大小', fontsize=12)
    ax2.set_ylabel('GPU 内存 (MB)', fontsize=12)
    ax2.grid(True, alpha=0.3, axis='y')
    
    # 添加数值标签
    for x, y in zip(batch_sizes, gpu_memories):
        ax2.text(x, y + max(gpu_memories)*0.02, f'{y:.0f}', 
                ha='center', va='bottom', fontweight='bold')
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'batch_gpu_metrics.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✅ 生成图表: batch_gpu_metrics.png")

def create_efficiency_chart(results, output_dir):
    """创建效率分析图表"""
    fig, ax = plt.subplots(1, 1, figsize=(10, 6))
    
    # 提取数据
    batch_sizes = []
    efficiencies = []  # FPS per GPU utilization
    
    for test in results['batch_tests']:
        if test['success'] and test['avg_gpu_util'] > 0:
            batch_sizes.append(test['batch_size'])
            efficiency = test['avg_throughput'] / test['avg_gpu_util']
            efficiencies.append(efficiency)
    
    if not batch_sizes:
        return
    
    # 效率图表
    ax.plot(batch_sizes, efficiencies, 'o-', color='#AA96DA', 
            linewidth=2, markersize=10, label='效率 (FPS/GPU%)')
    ax.set_title('批次大小 vs 性能效率', fontsize=14, fontweight='bold')
    ax.set_xlabel('批次大小', fontsize=12)
    ax.set_ylabel('效率 (FPS / GPU利用率%)', fontsize=12)
    ax.grid(True, alpha=0.3)
    ax.legend()
    
    # 添加数值标签
    for x, y in zip(batch_sizes, efficiencies):
        ax.text(x, y + max(efficiencies)*0.02, f'{y:.2f}', 
                ha='center', va='bottom', fontweight='bold')
    
    # 找到最高效率
    best_idx = np.argmax(efficiencies)
    ax.scatter([batch_sizes[best_idx]], [efficiencies[best_idx]], 
              color='gold', s=200, marker='*', zorder=5,
              label=f'最高效率: Batch {batch_sizes[best_idx]}')
    ax.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'batch_efficiency.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✅ 生成图表: batch_efficiency.png")

def create_comprehensive_table(results, output_dir):
    """创建综合性能对比表格"""
    fig, ax = plt.subplots(figsize=(14, 8))
    ax.axis('tight')
    ax.axis('off')
    
    # 准备表格数据
    headers = ['批次大小', '吞吐量\n(FPS)', '延迟\n(ms)', 'GPU利用率\n(%)', 
               'GPU内存\n(MB)', '测试时长\n(s)', '总帧数']
    
    table_data = []
    for test in results['batch_tests']:
        if test['success']:
            row = [
                test['batch_size'],
                f"{test['avg_throughput']:.1f}",
                f"{test['avg_latency_ms']:.1f}",
                f"{test['avg_gpu_util']:.1f}",
                f"{test['avg_gpu_memory_mb']:.0f}",
                f"{test['test_duration']:.1f}",
                test['total_frames']
            ]
            table_data.append(row)
        else:
            row = [test['batch_size'], '失败', '-', '-', '-', '-', '-']
            table_data.append(row)
    
    # 创建表格
    table = ax.table(cellText=table_data, colLabels=headers,
                    cellLoc='center', loc='center',
                    colWidths=[0.12, 0.15, 0.12, 0.15, 0.15, 0.15, 0.16])
    
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2)
    
    # 设置表头样式
    for i in range(len(headers)):
        table[(0, i)].set_facecolor('#4ECDC4')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    # 设置行颜色
    for i in range(1, len(table_data) + 1):
        for j in range(len(headers)):
            if i % 2 == 0:
                table[(i, j)].set_facecolor('#F0F0F0')
            else:
                table[(i, j)].set_facecolor('white')
    
    plt.title('批次性能测试综合对比表', fontsize=16, fontweight='bold', pad=20)
    plt.savefig(os.path.join(output_dir, 'batch_performance_table.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✅ 生成图表: batch_performance_table.png")

def generate_summary_report(results, output_dir):
    """生成总结报告"""
    report = f"""
动态批次 TensorRT 性能测试总结报告
{'='*60}

测试时间: {results['timestamp']}
引擎路径: {results['engine_path']}

"""
    
    if 'summary' in results and results['summary']:
        summary = results['summary']
        report += f"""测试概况:
{'='*60}
总测试数: {summary['total_tests']}
成功测试: {summary['successful_tests']}
失败测试: {summary['failed_tests']}

"""
        
        if 'best_throughput' in summary:
            report += f"""最佳吞吐量配置:
  批次大小: {summary['best_throughput']['batch_size']}
  吞吐量: {summary['best_throughput']['fps']:.1f} FPS

"""
        
        if 'best_latency' in summary:
            report += f"""最低延迟配置:
  批次大小: {summary['best_latency']['batch_size']}
  延迟: {summary['best_latency']['latency_ms']:.1f}ms

"""
    
    report += f"""详细测试结果:
{'='*60}
"""
    
    for test in results['batch_tests']:
        if test['success']:
            report += f"""
批次大小: {test['batch_size']}
  吞吐量: {test['avg_throughput']:.1f} FPS
  延迟: {test['avg_latency_ms']:.1f}ms
  GPU 利用率: {test['avg_gpu_util']:.1f}%
  GPU 内存: {test['avg_gpu_memory_mb']:.0f}MB (峰值: {test['max_gpu_memory_mb']:.0f}MB)
  测试时长: {test['test_duration']:.1f}s
  总帧数: {test['total_frames']}
"""
        else:
            report += f"""
批次大小: {test['batch_size']} - 测试失败
  错误信息: {test.get('error_message', '未知错误')}
"""
    
    report += f"""

推荐配置:
{'='*60}
"""
    
    # 分析并给出推荐
    successful_tests = [t for t in results['batch_tests'] if t['success']]
    if successful_tests:
        best_throughput = max(successful_tests, key=lambda x: x['avg_throughput'])
        best_latency = min(successful_tests, key=lambda x: x['avg_latency_ms'])
        
        report += f"""
✅ 追求最大吞吐量: 使用批次大小 {best_throughput['batch_size']} ({best_throughput['avg_throughput']:.1f} FPS)
✅ 追求最低延迟: 使用批次大小 {best_latency['batch_size']} ({best_latency['avg_latency_ms']:.1f}ms)
✅ 平衡性能与延迟: 建议使用批次大小 4-8

注意事项:
⚠️  批次大小越大，吞吐量越高，但单帧延迟也会增加
⚠️  实际部署时需要根据业务需求选择合适的批次大小
⚠️  GPU 内存占用随批次大小增加而增加，需要确保显存充足
"""
    
    # 保存报告
    report_file = os.path.join(output_dir, 'batch_performance_summary.txt')
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)
    
    print(report)
    print(f"\n📁 总结报告已保存: {report_file}")

def main():
    """主函数"""
    # 查找最新的测试结果文件
    results_dir = "batch_test_results"
    if not os.path.exists(results_dir):
        print("❌ 未找到测试结果目录")
        print("请先运行 run_batch_performance_test.py")
        return
    
    json_files = [f for f in os.listdir(results_dir) 
                  if f.startswith('batch_test_results_') and f.endswith('.json')]
    if not json_files:
        print("❌ 未找到测试结果文件")
        return
    
    # 使用最新的结果文件
    latest_file = sorted(json_files)[-1]
    json_path = os.path.join(results_dir, latest_file)
    
    print(f"📊 加载测试结果: {json_path}")
    results = load_results(json_path)
    
    # 创建可视化输出目录
    viz_dir = os.path.join(results_dir, "visualizations")
    os.makedirs(viz_dir, exist_ok=True)
    
    print("\n🎨 生成可视化图表...")
    
    # 生成各种图表
    create_throughput_chart(results, viz_dir)
    create_gpu_utilization_chart(results, viz_dir)
    create_efficiency_chart(results, viz_dir)
    create_comprehensive_table(results, viz_dir)
    
    # 生成总结报告
    generate_summary_report(results, viz_dir)
    
    print(f"\n✅ 所有可视化图表已生成完成！")
    print(f"📁 输出目录: {viz_dir}")

if __name__ == "__main__":
    main()