Test_AI/batch_comparison_test.py

#!/usr/bin/env python3
"""
PyTorch vs TensorRT 批次性能对比测试
基于已有的 PyTorch 数据，测试 TensorRT 性能并生成对比图表
"""

import os
import time
import json
import numpy as np
import torch
import matplotlib.pyplot as plt
from datetime import datetime
from ultralytics import YOLO

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# PyTorch 已有数据（从图表中提取 + 新测试数据）
PYTORCH_DATA = {
    1: 64.4,
    2: 91.2,
    4: 122.8,
    8: 131.4,
    16: 145.9,  # 新测试数据
    32: 147.8   # 新测试数据
}

def test_tensorrt_batch_performance(engine_path, batch_sizes, test_duration=20):
    """测试 TensorRT 批次性能"""
    print("🚀 开始测试 TensorRT 批次性能")
    print("=" * 60)

    # 加载 TensorRT 引擎
    print(f"📦 加载 TensorRT 引擎: {engine_path}")
    model = YOLO(engine_path)
    print("✅ 引擎加载成功")

    results = {}

    for batch_size in batch_sizes:
        print(f"\n🔄 测试批次大小: {batch_size} (测试时长: {test_duration}秒)")

        try:
            # 预热
            print("🔥 预热中...")
            for _ in range(5):
                test_images = [np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
                              for _ in range(batch_size)]
                model(test_images, verbose=False)

            # 正式测试
            fps_list = []
            latency_list = []
            batch_count = 0

            start_time = time.time()
            last_fps_time = start_time
            fps_batch_count = 0

            while time.time() - start_time < test_duration:
                # 生成测试数据
                test_images = [np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
                              for _ in range(batch_size)]

                # 推理
                infer_start = time.time()
                model(test_images, verbose=False)
                infer_end = time.time()

                latency_ms = (infer_end - infer_start) * 1000
                latency_list.append(latency_ms)

                batch_count += 1
                fps_batch_count += 1

                # 每秒计算一次 FPS
                current_time = time.time()
                if current_time - last_fps_time >= 1.0:
                    fps = (fps_batch_count * batch_size) / (current_time - last_fps_time)
                    fps_list.append(fps)
                    fps_batch_count = 0
                    last_fps_time = current_time

                    # 显示进度
                    elapsed = current_time - start_time
                    print(f"  进度: {elapsed:.1f}s/{test_duration}s, "
                          f"当前FPS: {fps:.1f}, 延迟: {latency_ms:.1f}ms")

            # 计算结果
            total_time = time.time() - start_time
            total_frames = batch_count * batch_size

            avg_fps = np.mean(fps_list) if fps_list else 0
            avg_latency_ms = np.mean(latency_list)

            results[batch_size] = {
                'avg_fps': avg_fps,
                'avg_latency_ms': avg_latency_ms,
                'total_frames': total_frames,
                'test_duration': total_time,
                'success': True
            }

            print(f"✅ 批次 {batch_size} 测试完成:")
            print(f"   平均FPS: {avg_fps:.1f}")
            print(f"   平均延迟: {avg_latency_ms:.1f}ms")

        except Exception as e:
            print(f"❌ 批次 {batch_size} 测试失败: {e}")
            results[batch_size] = {
                'avg_fps': 0,
                'avg_latency_ms': 0,
                'success': False,
                'error': str(e)
            }

    return results

def create_comparison_chart(pytorch_data, tensorrt_data, output_dir):
    """创建 PyTorch vs TensorRT 对比图表"""
    print("\n🎨 生成对比图表...")

    os.makedirs(output_dir, exist_ok=True)

    # 提取数据
    batch_sizes = sorted(pytorch_data.keys())
    pytorch_fps = [pytorch_data[bs] if pytorch_data[bs] is not None else 0 for bs in batch_sizes]
    tensorrt_fps = [tensorrt_data[bs]['avg_fps'] if tensorrt_data[bs]['success'] else 0
                    for bs in batch_sizes]

    # 创建图表
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

    # 图表 1: FPS 对比
    x = np.arange(len(batch_sizes))
    width = 0.35

    # 只显示有 PyTorch 数据的批次
    pytorch_mask = [pytorch_data[bs] is not None for bs in batch_sizes]
    pytorch_x = x[pytorch_mask]
    pytorch_values = [pytorch_fps[i] for i, m in enumerate(pytorch_mask) if m]

    bars1 = ax1.bar(pytorch_x - width/2, pytorch_values, width, label='PyTorch',
                    color='#FF6B6B', alpha=0.8)
    bars2 = ax1.bar(x + width/2, tensorrt_fps, width, label='TensorRT',
                    color='#4ECDC4', alpha=0.8)

    ax1.set_xlabel('批次大小', fontsize=12)
    ax1.set_ylabel('FPS (帧/秒)', fontsize=12)
    ax1.set_title('PyTorch vs TensorRT 批量推理性能对比', fontsize=14, fontweight='bold')
    ax1.set_xticks(x)
    ax1.set_xticklabels(batch_sizes)
    ax1.legend()
    ax1.grid(True, alpha=0.3, axis='y')

    # 添加数值标签
    for bar in bars1:
        height = bar.get_height()
        if height > 0:
            ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
                    f'{height:.1f}', ha='center', va='bottom', fontweight='bold')

    for bar in bars2:
        height = bar.get_height()
        if height > 0:
            ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
                    f'{height:.1f}', ha='center', va='bottom', fontweight='bold')

    # 图表 2: 性能提升百分比（只对比有 PyTorch 数据的批次）
    improvements = []
    improvement_labels = []
    for bs in batch_sizes:
        if pytorch_data[bs] is not None and tensorrt_data[bs]['success'] and pytorch_data[bs] > 0:
            improvement = (tensorrt_data[bs]['avg_fps'] - pytorch_data[bs]) / pytorch_data[bs] * 100
            improvements.append(improvement)
            improvement_labels.append(bs)

    if improvements:
        colors = ['green' if imp > 0 else 'red' for imp in improvements]
        bars3 = ax2.bar(improvement_labels, improvements, color=colors, alpha=0.8, edgecolor='black')
        ax2.set_xlabel('批次大小', fontsize=12)
        ax2.set_ylabel('性能提升 (%)', fontsize=12)
        ax2.set_title('TensorRT 相对 PyTorch 的性能提升', fontsize=14, fontweight='bold')
        ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
        ax2.grid(True, alpha=0.3, axis='y')

        # 添加数值标签
        for bar, imp in zip(bars3, improvements):
            height = bar.get_height()
            ax2.text(bar.get_x() + bar.get_width()/2., height + (2 if height > 0 else -2),
                    f'{imp:+.1f}%', ha='center', va='bottom' if height > 0 else 'top',
                    fontweight='bold')

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'pytorch_vs_tensorrt_comparison.png'),
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✅ 对比图表已保存: pytorch_vs_tensorrt_comparison.png")

def create_combined_line_chart(pytorch_data, tensorrt_data, output_dir):
    """创建组合折线图"""
    print("🎨 生成组合折线图...")

    batch_sizes = sorted(pytorch_data.keys())
    pytorch_fps = [pytorch_data[bs] if pytorch_data[bs] is not None else None for bs in batch_sizes]
    tensorrt_fps = [tensorrt_data[bs]['avg_fps'] if tensorrt_data[bs]['success'] else 0
                    for bs in batch_sizes]

    # 创建图表
    fig, ax = plt.subplots(figsize=(12, 7))

    # PyTorch 折线（只绘制有数据的点）
    pytorch_valid_x = [bs for bs, fps in zip(batch_sizes, pytorch_fps) if fps is not None]
    pytorch_valid_y = [fps for fps in pytorch_fps if fps is not None]

    if pytorch_valid_x:
        ax.plot(pytorch_valid_x, pytorch_valid_y, 'o-', color='#FF6B6B',
                linewidth=3, markersize=12, label='PyTorch', markeredgecolor='white', markeredgewidth=2)

    # TensorRT 折线（绘制所有批次）
    ax.plot(batch_sizes, tensorrt_fps, 's-', color='#4ECDC4',
            linewidth=3, markersize=12, label='TensorRT', markeredgecolor='white', markeredgewidth=2)

    # TensorRT 单帧性能参考线（从之前的测试结果）
    tensorrt_single_fps = 140.1  # 从之前的测试结果
    ax.axhline(y=tensorrt_single_fps, color='#4ECDC4', linestyle='--',
               linewidth=2, alpha=0.5, label='TensorRT (单帧参考)')

    ax.set_xlabel('批次大小', fontsize=14, fontweight='bold')
    ax.set_ylabel('FPS (帧/秒)', fontsize=14, fontweight='bold')
    ax.set_title('批量推理性能对比 (PyTorch vs TensorRT)', fontsize=16, fontweight='bold', pad=20)
    ax.grid(True, alpha=0.3, linestyle='--')
    ax.legend(fontsize=12, loc='upper left')

    # 添加数值标签
    for i, (bs, pt_fps, trt_fps) in enumerate(zip(batch_sizes, pytorch_fps, tensorrt_fps)):
        # PyTorch 标签
        if pt_fps is not None:
            ax.text(bs, pt_fps + 3, f'{pt_fps:.1f}', ha='center', va='bottom',
                    fontweight='bold', fontsize=10, color='#FF6B6B')

        # TensorRT 标签
        if trt_fps > 0:
            ax.text(bs, trt_fps - 3, f'{trt_fps:.1f}', ha='center', va='top',
                    fontweight='bold', fontsize=10, color='#4ECDC4')

    # 设置 x 轴刻度
    ax.set_xticks(batch_sizes)
    ax.set_xticklabels(batch_sizes, fontsize=12)

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'batch_performance_line_chart.png'),
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✅ 折线图已保存: batch_performance_line_chart.png")

def generate_comparison_report(pytorch_data, tensorrt_data, output_dir):
    """生成对比报告"""
    print("\n📝 生成对比报告...")

    report = f"""
PyTorch vs TensorRT 批量推理性能对比报告
{'='*60}

测试时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

详细对比数据:
{'='*60}
"""

    batch_sizes = sorted(pytorch_data.keys())

    for bs in batch_sizes:
        pt_fps = pytorch_data[bs]
        trt_result = tensorrt_data[bs]

        if trt_result['success']:
            trt_fps = trt_result['avg_fps']

            if pt_fps is not None:
                improvement = (trt_fps - pt_fps) / pt_fps * 100
                report += f"""
批次大小: {bs}
  PyTorch FPS:  {pt_fps:.1f}
  TensorRT FPS: {trt_fps:.1f}
  性能提升:     {improvement:+.1f}%
  TensorRT 延迟: {trt_result['avg_latency_ms']:.1f}ms
"""
            else:
                report += f"""
批次大小: {bs}
  PyTorch FPS:  未测试
  TensorRT FPS: {trt_fps:.1f}
  TensorRT 延迟: {trt_result['avg_latency_ms']:.1f}ms
"""
        else:
            if pt_fps is not None:
                report += f"""
批次大小: {bs}
  PyTorch FPS:  {pt_fps:.1f}
  TensorRT:     测试失败 - {trt_result.get('error', '未知错误')}
"""
            else:
                report += f"""
批次大小: {bs}
  PyTorch:      未测试
  TensorRT:     测试失败 - {trt_result.get('error', '未知错误')}
"""

    # 计算总体统计
    successful_tests = [bs for bs in batch_sizes if tensorrt_data[bs]['success']]
    if successful_tests:
        # 只计算有 PyTorch 对比数据的批次的平均提升
        comparable_tests = [bs for bs in successful_tests if pytorch_data[bs] is not None]

        if comparable_tests:
            avg_improvement = np.mean([
                (tensorrt_data[bs]['avg_fps'] - pytorch_data[bs]) / pytorch_data[bs] * 100
                for bs in comparable_tests
            ])
        else:
            avg_improvement = None

        best_bs = max(successful_tests, key=lambda bs: tensorrt_data[bs]['avg_fps'])
        best_fps = tensorrt_data[best_bs]['avg_fps']

        report += f"""

总体统计:
{'='*60}
成功测试: {len(successful_tests)}/{len(batch_sizes)}
"""

        if avg_improvement is not None:
            report += f"平均性能提升 (相对PyTorch): {avg_improvement:+.1f}%\n"

        report += f"""最佳配置: 批次大小 {best_bs} ({best_fps:.1f} FPS)

推荐配置:
{'='*60}
✅ 实时场景 (低延迟): 批次大小 1-2
✅ 平衡场景: 批次大小 4-8
✅ 高吞吐量场景: 批次大小 16-32

关键发现:
{'='*60}
"""

        # 分析性能趋势
        if len(successful_tests) >= 2:
            fps_values = [tensorrt_data[bs]['avg_fps'] for bs in successful_tests]
            if fps_values[-1] > fps_values[0] * 1.5:
                report += "🚀 TensorRT 在大批次下表现优异，吞吐量显著提升\n"

            if comparable_tests and all(tensorrt_data[bs]['avg_fps'] > pytorch_data[bs] for bs in comparable_tests):
                report += "✅ TensorRT 在所有可对比批次下均优于 PyTorch\n"

            # 分析批次 16 和 32 的性能
            if 16 in successful_tests and 32 in successful_tests:
                fps_16 = tensorrt_data[16]['avg_fps']
                fps_32 = tensorrt_data[32]['avg_fps']
                if fps_32 > fps_16 * 1.3:
                    report += f"🎯 批次 32 相比批次 16 吞吐量提升 {(fps_32/fps_16-1)*100:.1f}%，GPU 利用率更高\n"
                elif fps_32 < fps_16 * 1.1:
                    report += "⚠️ 批次 32 性能提升有限，可能受 GPU 显存或计算能力限制\n"

    # 保存报告
    report_file = os.path.join(output_dir, 'comparison_report.txt')
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)

    print(report)
    print(f"\n📁 报告已保存: {report_file}")

def main():
    """主函数"""
    print("PyTorch vs TensorRT 批量推理性能对比测试")
    print("=" * 60)

    # TensorRT 引擎路径
    engine_path = "C:/Users/16337/PycharmProjects/Security/yolo11n.engine"

    # 检查引擎文件
    if not os.path.exists(engine_path):
        print(f"❌ TensorRT 引擎不存在: {engine_path}")
        return

    # 检查 CUDA
    if not torch.cuda.is_available():
        print("❌ CUDA 不可用")
        return

    print(f"✅ CUDA 可用，设备: {torch.cuda.get_device_name(0)}")
    print(f"✅ TensorRT 引擎: {engine_path}")

    # 测试批次大小（包括所有支持的批次）
    batch_sizes = [1, 2, 4, 8, 16, 32]
    test_duration = 20  # 每批次测试 20 秒

    print(f"\n📊 测试配置:")
    print(f"  批次大小: {batch_sizes}")
    print(f"  每批次测试时长: {test_duration}秒")
    print(f"\n📈 PyTorch 参考数据:")
    for bs, fps in PYTORCH_DATA.items():
        if fps is not None:
            print(f"  批次 {bs}: {fps:.1f} FPS")
        else:
            print(f"  批次 {bs}: 待测试")

    try:
        # 测试 TensorRT 性能
        tensorrt_results = test_tensorrt_batch_performance(engine_path, batch_sizes, test_duration)

        # 保存结果
        output_dir = "comparison_results"
        os.makedirs(output_dir, exist_ok=True)

        # 保存 JSON 数据
        results_data = {
            'pytorch': PYTORCH_DATA,
            'tensorrt': tensorrt_results,
            'timestamp': datetime.now().isoformat()
        }

        json_file = os.path.join(output_dir, f"comparison_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
        with open(json_file, 'w', encoding='utf-8') as f:
            json.dump(results_data, f, indent=2, ensure_ascii=False)

        print(f"\n✅ 测试数据已保存: {json_file}")

        # 生成可视化图表
        create_comparison_chart(PYTORCH_DATA, tensorrt_results, output_dir)
        create_combined_line_chart(PYTORCH_DATA, tensorrt_results, output_dir)

        # 生成对比报告
        generate_comparison_report(PYTORCH_DATA, tensorrt_results, output_dir)

        print(f"\n🎉 测试完成！")
        print(f"📁 所有结果已保存到: {output_dir}/")

    except KeyboardInterrupt:
        print("\n\n⏹️ 测试被用户中断")
    except Exception as e:
        print(f"\n❌ 测试过程中发生错误: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()