Skip to main content
ClaudeWave
Skill59.2k repo starsupdated today

agent-benchmark-suite

The agent-benchmark-suite performs comprehensive performance testing across throughput, latency, scalability, and resource usage metrics. Use it to establish baseline performance measurements, detect performance regressions between versions, validate optimization changes, and monitor multi-agent coordination efficiency in system deployments.

Install in Claude Code
Copy
git clone --depth 1 https://github.com/ruvnet/ruflo /tmp/agent-benchmark-suite && cp -r /tmp/agent-benchmark-suite/.agents/skills/agent-benchmark-suite ~/.claude/skills/agent-benchmark-suite
Then start a new Claude Code session; the skill loads automatically.

SKILL.md

---
name: Benchmark Suite
type: agent
category: optimization
description: Comprehensive performance benchmarking, regression detection and performance validation
---

# Benchmark Suite Agent

## Agent Profile
- **Name**: Benchmark Suite
- **Type**: Performance Optimization Agent
- **Specialization**: Comprehensive performance benchmarking and testing
- **Performance Focus**: Automated benchmarking, regression detection, and performance validation

## Core Capabilities

### 1. Comprehensive Benchmarking Framework
```javascript
// Advanced benchmarking system
class ComprehensiveBenchmarkSuite {
  constructor() {
    this.benchmarks = {
      // Core performance benchmarks
      throughput: new ThroughputBenchmark(),
      latency: new LatencyBenchmark(),
      scalability: new ScalabilityBenchmark(),
      resource_usage: new ResourceUsageBenchmark(),
      
      // Swarm-specific benchmarks
      coordination: new CoordinationBenchmark(),
      load_balancing: new LoadBalancingBenchmark(),
      topology: new TopologyBenchmark(),
      fault_tolerance: new FaultToleranceBenchmark(),
      
      // Custom benchmarks
      custom: new CustomBenchmarkManager()
    };
    
    this.reporter = new BenchmarkReporter();
    this.comparator = new PerformanceComparator();
    this.analyzer = new BenchmarkAnalyzer();
  }
  
  // Execute comprehensive benchmark suite
  async runBenchmarkSuite(config = {}) {
    const suiteConfig = {
      duration: config.duration || 300000, // 5 minutes default
      iterations: config.iterations || 10,
      warmupTime: config.warmupTime || 30000, // 30 seconds
      cooldownTime: config.cooldownTime || 10000, // 10 seconds
      parallel: config.parallel || false,
      baseline: config.baseline || null
    };
    
    const results = {
      summary: {},
      detailed: new Map(),
      baseline_comparison: null,
      recommendations: []
    };
    
    // Warmup phase
    await this.warmup(suiteConfig.warmupTime);
    
    // Execute benchmarks
    if (suiteConfig.parallel) {
      results.detailed = await this.runBenchmarksParallel(suiteConfig);
    } else {
      results.detailed = await this.runBenchmarksSequential(suiteConfig);
    }
    
    // Generate summary
    results.summary = this.generateSummary(results.detailed);
    
    // Compare with baseline if provided
    if (suiteConfig.baseline) {
      results.baseline_comparison = await this.compareWithBaseline(
        results.detailed, 
        suiteConfig.baseline
      );
    }
    
    // Generate recommendations
    results.recommendations = await this.generateRecommendations(results);
    
    // Cooldown phase
    await this.cooldown(suiteConfig.cooldownTime);
    
    return results;
  }
  
  // Parallel benchmark execution
  async runBenchmarksParallel(config) {
    const benchmarkPromises = Object.entries(this.benchmarks).map(
      async ([name, benchmark]) => {
        const result = await this.executeBenchmark(benchmark, name, config);
        return [name, result];
      }
    );
    
    const results = await Promise.all(benchmarkPromises);
    return new Map(results);
  }
  
  // Sequential benchmark execution
  async runBenchmarksSequential(config) {
    const results = new Map();
    
    for (const [name, benchmark] of Object.entries(this.benchmarks)) {
      const result = await this.executeBenchmark(benchmark, name, config);
      results.set(name, result);
      
      // Brief pause between benchmarks
      await this.sleep(1000);
    }
    
    return results;
  }
}
```

### 2. Performance Regression Detection
```javascript
// Advanced regression detection system
class RegressionDetector {
  constructor() {
    this.detectors = {
      statistical: new StatisticalRegressionDetector(),
      machine_learning: new MLRegressionDetector(),
      threshold: new ThresholdRegressionDetector(),
      trend: new TrendRegressionDetector()
    };
    
    this.analyzer = new RegressionAnalyzer();
    this.alerting = new RegressionAlerting();
  }
  
  // Detect performance regressions
  async detectRegressions(currentResults, historicalData, config = {}) {
    const regressions = {
      detected: [],
      severity: 'none',
      confidence: 0,
      analysis: {}
    };
    
    // Run multiple detection algorithms
    const detectionPromises = Object.entries(this.detectors).map(
      async ([method, detector]) => {
        const detection = await detector.detect(currentResults, historicalData, config);
        return [method, detection];
      }
    );
    
    const detectionResults = await Promise.all(detectionPromises);
    
    // Aggregate detection results
    for (const [method, detection] of detectionResults) {
      if (detection.regression_detected) {
        regressions.detected.push({
          method,
          ...detection
        });
      }
    }
    
    // Calculate overall confidence and severity
    if (regressions.detected.length > 0) {
      regressions.confidence = this.calculateAggregateConfidence(regressions.detected);
      regressions.severity = this.calculateSeverity(regressions.detected);
      regressions.analysis = await this.analyzer.analyze(regressions.detected);
    }
    
    return regressions;
  }
  
  // Statistical regression detection using change point analysis
  async detectStatisticalRegression(metric, historicalData, sensitivity = 0.95) {
    // Use CUSUM (Cumulative Sum) algorithm for change point detection
    const cusum = this.calculateCUSUM(metric, historicalData);
    
    // Detect change points
    const changePoints = this.detectChangePoints(cusum, sensitivity);
    
    // Analyze significance of changes
    const analysis = changePoints.map(point => ({
      timestamp: point.timestamp,
      magnitude: point.magnitude,
      direction: point.direction,
      significance: point.significance,
      confidence: point.confidence
    }));
    
    return {
      regression_detected: changePoints.length > 0,
      change_points: analy