Add YOLO11 TensorRT quantization benchmark scripts

- Engine build scripts (FP16/INT8) - Benchmark validation scripts - Result parsing and analysis tools - COCO dataset configuration
2026-01-29 13:59:42 +08:00
commit 942244bd88
34 changed files with 3514 additions and 0 deletions
--- a/parse_final.py
+++ b/parse_final.py
@@ -0,0 +1,369 @@
+"""
+YOLO11n TensorRT Engine Benchmark - Final Version
+With hard validation to prevent "zero-data" conclusions
+"""
+
+import re
+import json
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import sys
+
+class BenchmarkParser:
+    def __init__(self):
+        self.results_dir = Path("vehicle_person_benchmark")
+        self.results_dir.mkdir(exist_ok=True)
+        
+    def parse_result_file(self, filepath):
+        """Parse validation result file with comprehensive error checking"""
+        with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
+            content = f.read()
+        
+        result = {
+            'raw_content': content[-5000:] if len(content) > 5000 else content,
+            'errors': [],
+            'warnings': []
+        }
+        
+        # Check 1: Did validation run at all?
+        if 'Traceback' in content or 'Error' in content:
+            result['errors'].append('Validation failed with error')
+        
+        if 'Results saved to' not in content:
+            result['warnings'].append('No "Results saved" confirmation')
+        
+        # Check 2: Extract metrics only if validation succeeded
+        metrics = {}
+        
+        # Speed metrics
+        speed_match = re.search(
+            r'Speed:\s*([\d.]+)ms\s+preprocess,\s*([\d.]+)ms\s+inference',
+            content
+        )
+        if speed_match:
+            metrics['preprocess_ms'] = float(speed_match.group(1))
+            metrics['inference_ms'] = float(speed_match.group(2))
+        else:
+            result['warnings'].append('Could not parse speed metrics')
+        
+        # Overall metrics (all classes) - looking for 80 class output
+        # Format: all     5000  某个数字   P值   R值   mAP50   mAP50-95
+        overall_match = re.search(
+            r'^\s*all\s+\d+\s+\d+\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)',
+            content, re.MULTILINE
+        )
+        if overall_match:
+            metrics['P'] = float(overall_match.group(1))
+            metrics['R'] = float(overall_match.group(2))
+            metrics['ap50'] = float(overall_match.group(3))
+            metrics['ap50_95'] = float(overall_match.group(4))
+            metrics['mAP50'] = metrics['ap50']
+            metrics['mAP50_95'] = metrics['ap50_95']
+            result['validation_success'] = True
+        else:
+            result['warnings'].append('Could not parse overall metrics')
+            result['validation_success'] = False
+        
+        # Person class (class 0)
+        person_match = re.search(
+            r'^\s*person\s+\d+\s+\d+\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)',
+            content, re.MULTILINE
+        )
+        if person_match:
+            metrics['person'] = {
+                'P': float(person_match.group(1)),
+                'R': float(person_match.group(2)),
+                'ap50': float(person_match.group(3)),
+                'ap50_95': float(person_match.group(4))
+            }
+        else:
+            result['warnings'].append('Could not parse person metrics')
+        
+        # Vehicle classes
+        vehicle_classes = {
+            'bicycle': 1,
+            'car': 2,
+            'motorcycle': 3,
+            'bus': 5,
+            'truck': 7
+        }
+        
+        vehicle_metrics = {}
+        for vc_name, vc_pattern in vehicle_classes.items():
+            pattern = rf'^\s*{vc_name}\s+\d+\s+\d+\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)'
+            match = re.search(pattern, content, re.MULTILINE)
+            if match:
+                vehicle_metrics[vc_name] = {
+                    'P': float(match.group(1)),
+                    'R': float(match.group(2)),
+                    'ap50': float(match.group(3)),
+                    'ap50_95': float(match.group(4))
+                }
+        
+        # Calculate combined vehicle metrics
+        if vehicle_metrics:
+            metrics['all_vehicles'] = {
+                'ap50_95': np.mean([v['ap50_95'] for v in vehicle_metrics.values()]),
+                'ap50': np.mean([v['ap50'] for v in vehicle_metrics.values()]),
+                'P': np.mean([v['P'] for v in vehicle_metrics.values()]),
+                'R': np.mean([v['R'] for v in vehicle_metrics.values()])
+            }
+        else:
+            result['warnings'].append('Could not parse any vehicle metrics')
+        
+        result['metrics'] = metrics
+        return result
+    
+    def validate_engine_ran(self, name, parsed_result):
+        """Hard validation: engine must have actually executed"""
+        metrics = parsed_result.get('metrics', {})
+        
+        # Critical checks
+        inference_ms = metrics.get('inference_ms', 0)
+        map50_95 = metrics.get('mAP50_95', 0)
+        validation_success = parsed_result.get('validation_success', False)
+        
+        errors = parsed_result.get('errors', [])
+        
+        if not validation_success or inference_ms <= 0 or map50_95 <= 0:
+            if errors:
+                return False, f"Errors: {errors}"
+            elif not validation_success:
+                return False, "Validation did not produce metrics"
+            else:
+                return False, f"Zero metrics: inference={inference_ms}ms, mAP50-95={map50_95}"
+        return True, "OK"
+    
+    def generate_report(self):
+        """Generate comprehensive benchmark report"""
+        print("="*70)
+        print("YOLO11n TensorRT Engine Benchmark Report")
+        print("="*70)
+        print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print()
+        
+        result_files = {
+            "FP32_PyTorch": "fp32_results.txt",
+            "INT8_640p": "int8_640_results.txt",
+            "FP16_640p": "fp16_640_results.txt",
+            "FP16_480p": "fp16_480_results.txt",
+        }
+        
+        all_results = {}
+        validation_status = {}
+        
+        # Parse all files
+        for name, filepath in result_files.items():
+            if not Path(filepath).exists():
+                print(f"[!] {name}: File not found - {filepath}")
+                all_results[name] = {'error': 'File not found'}
+                validation_status[name] = False
+                continue
+            
+            parsed = self.parse_result_file(filepath)
+            valid, msg = self.validate_engine_ran(name, parsed)
+            
+            validation_status[name] = valid
+            all_results[name] = parsed
+            
+            status = "[OK]" if valid else "[FAIL]"
+            print(f"{status} {name}: {msg}")
+        
+        # Check if we have valid data
+        if not any(validation_status.values()):
+            print("\n" + "!"*70)
+            print("CRITICAL ERROR: No engine validation succeeded!")
+            print("!"*70)
+            print("\nPossible causes:")
+            print("  1. Engine files are corrupted or incompatible")
+            print("  2. Batch/shape mismatch between engine and validation")
+            print("  3. Dataset path issues")
+            print("\nPlease check the result files for details.")
+            
+            # Save error report
+            error_report = {
+                'timestamp': datetime.now().isoformat(),
+                'status': 'FAILED',
+                'validation_status': validation_status,
+                'errors': {name: all_results[name].get('errors', []) for name in all_results}
+            }
+            
+            with open(self.results_dir / 'benchmark_errors.json', 'w') as f:
+                json.dump(error_report, f, indent=2)
+            
+            sys.exit(1)
+        
+        # Filter valid results
+        valid_results = {k: v for k, v in all_results.items() if validation_status[k]}
+        
+        # Generate report sections
+        self._print_overall_comparison(valid_results)
+        self._print_person_metrics(valid_results)
+        self._print_vehicle_metrics(valid_results)
+        self._print_speed_comparison(valid_results)
+        self._print_drop_analysis(valid_results)
+        self._print_conclusions(valid_results)
+        
+        # Save everything
+        self._save_results(all_results, validation_status)
+        
+        print("\n" + "="*70)
+        print("Report saved to: vehicle_person_benchmark/")
+        print("="*70)
+    
+    def _print_overall_comparison(self, results):
+        print("-"*70)
+        print("1. Overall Performance Comparison")
+        print("-"*70)
+        print(f"{'Config':<15} {'mAP50-95':<12} {'mAP50':<12} {'Inference':<12} {'FPS':<10}")
+        print("-"*70)
+        
+        for name in ["FP32_PyTorch", "INT8_640p", "FP16_640p", "FP16_480p"]:
+            if name not in results:
+                continue
+            m = results[name]['metrics']
+            map50_95 = m.get('mAP50_95', 0)
+            map50 = m.get('mAP50', 0)
+            inf_ms = m.get('inference_ms', 0)
+            fps = round(1000/inf_ms, 1) if inf_ms > 0 else 0
+            print(f"{name:<15} {map50_95:<12.4f} {map50:<12.4f} {inf_ms:<12.1f} {fps:<10.1f}")
+        print()
+    
+    def _print_person_metrics(self, results):
+        print("-"*70)
+        print("2. Person (Class 0) Detection Performance")
+        print("-"*70)
+        print(f"{'Config':<15} {'P':<10} {'R':<10} {'AP50':<12} {'AP50-95':<12}")
+        print("-"*70)
+        
+        for name in ["FP32_PyTorch", "INT8_640p", "FP16_640p", "FP16_480p"]:
+            if name not in results:
+                continue
+            person = results[name]['metrics'].get('person', {})
+            p = person.get('P', 0)
+            r = person.get('R', 0)
+            ap50 = person.get('ap50', 0)
+            ap50_95 = person.get('ap50_95', 0)
+            print(f"{name:<15} {p:<10.3f} {r:<10.3f} {ap50:<12.4f} {ap50_95:<12.4f}")
+        print()
+    
+    def _print_vehicle_metrics(self, results):
+        print("-"*70)
+        print("3. Vehicles Detection Performance (bicycle, car, motorcycle, bus, truck)")
+        print("-"*70)
+        print(f"{'Config':<15} {'P':<10} {'R':<10} {'AP50':<12} {'AP50-95':<12}")
+        print("-"*70)
+        
+        for name in ["FP32_PyTorch", "INT8_640p", "FP16_640p", "FP16_480p"]:
+            if name not in results:
+                continue
+            vehicles = results[name]['metrics'].get('all_vehicles', {})
+            p = vehicles.get('P', 0)
+            r = vehicles.get('R', 0)
+            ap50 = vehicles.get('ap50', 0)
+            ap50_95 = vehicles.get('ap50_95', 0)
+            print(f"{name:<15} {p:<10.3f} {r:<10.3f} {ap50:<12.4f} {ap50_95:<12.4f}")
+        print()
+    
+    def _print_speed_comparison(self, results):
+        print("-"*70)
+        print("4. Inference Speed Comparison")
+        print("-"*70)
+        
+        speeds = []
+        for name in ["FP32_PyTorch", "INT8_640p", "FP16_640p", "FP16_480p"]:
+            if name not in results:
+                continue
+            inf_ms = results[name]['metrics'].get('inference_ms', 0)
+            if inf_ms > 0:
+                speeds.append((name, inf_ms))
+        
+        speeds.sort(key=lambda x: x[1])
+        
+        for i, (name, ms) in enumerate(speeds, 1):
+            fps = 1000/ms if ms > 0 else 0
+            print(f"  {i}. {name}: {ms:.2f}ms ({fps:.1f} FPS)")
+        print()
+    
+    def _print_drop_analysis(self, results):
+        print("-"*70)
+        print("5. mAP Drop Analysis (vs FP32)")
+        print("-"*70)
+        
+        fp32_map = results.get('FP32_PyTorch', {}).get('metrics', {}).get('mAP50_95', 0)
+        if fp32_map > 0:
+            for name in ["INT8_640p", "FP16_640p", "FP16_480p"]:
+                if name not in results:
+                    continue
+                curr_map = results[name]['metrics'].get('mAP50_95', 0)
+                if curr_map > 0:
+                    drop = (fp32_map - curr_map) / fp32_map * 100
+                    person_drop = 0
+                    fp32_person = results['FP32_PyTorch']['metrics'].get('person', {}).get('ap50_95', 0)
+                    curr_person = results[name]['metrics'].get('person', {}).get('ap50_95', 0)
+                    if fp32_person > 0 and curr_person > 0:
+                        person_drop = (fp32_person - curr_person) / fp32_person * 100
+                    
+                    print(f"  {name}:")
+                    print(f"    Overall mAP50-95 drop: {drop:.2f}%")
+                    print(f"    Person mAP50-95 drop:  {person_drop:.2f}%")
+        print()
+    
+    def _print_conclusions(self, results):
+        print("="*70)
+        print("6. Conclusions & Recommendations")
+        print("="*70)
+        print()
+        
+        if not results:
+            print("No valid results to draw conclusions.")
+            return
+        
+        # Find best in each category
+        valid_names = list(results.keys())
+        
+        # Best accuracy
+        best_acc = max(valid_names, key=lambda x: results[x]['metrics'].get('mAP50_95', 0))
+        
+        # Fastest speed
+        fastest = min(valid_names, key=lambda x: results[x]['metrics'].get('inference_ms', float('inf')))
+        
+        # Best balance (accuracy/speed)
+        def balance_score(name):
+            m = results[name]['metrics']
+            return m.get('mAP50_95', 0) / max(m.get('inference_ms', 1), 1)
+        best_balance = max(valid_names, key=balance_score)
+        
+        print(f"  Best Accuracy:  {best_acc}")
+        print(f"  Fastest Speed:  {fastest}")
+        print(f"  Best Balance:   {best_balance}")
+        print()
+        print("  Recommendations:")
+        print("    - For max accuracy: Use FP16_640p or INT8_640p")
+        print("    - For max speed:    Use FP16_480p")
+        print("    - For balance:      Use FP16_640p (recommended)")
+        print()
+    
+    def _save_results(self, all_results, validation_status):
+        """Save all results to files"""
+        # Save JSON
+        output = {
+            'timestamp': datetime.now().isoformat(),
+            'validation_status': validation_status,
+            'results': {k: v.get('metrics', {}) for k, v in all_results.items() if validation_status.get(k)}
+        }
+        
+        with open(self.results_dir / 'all_results.json', 'w') as f:
+            json.dump(output, f, indent=2)
+        
+        # Save text report
+        report_file = self.results_dir / 'final_report.txt'
+        with open(report_file, 'w', encoding='utf-8') as f:
+            f.write("YOLO11n TensorRT Engine Benchmark Report\n")
+            f.write(f"Generated: {datetime.now().isoformat()}\n\n")
+            f.write("See console output for full report.\n")
+
+if __name__ == "__main__":
+    parser = BenchmarkParser()
+    parser.generate_report()