agent-benchmark-suite
The agent-benchmark-suite performs comprehensive performance testing across throughput, latency, scalability, and resource usage metrics. Use it to establish baseline performance measurements, detect performance regressions between versions, validate optimization changes, and monitor multi-agent coordination efficiency in system deployments.
git clone --depth 1 https://github.com/ruvnet/ruflo /tmp/agent-benchmark-suite && cp -r /tmp/agent-benchmark-suite/.agents/skills/agent-benchmark-suite ~/.claude/skills/agent-benchmark-suiteSKILL.md
---
name: Benchmark Suite
type: agent
category: optimization
description: Comprehensive performance benchmarking, regression detection and performance validation
---
# Benchmark Suite Agent
## Agent Profile
- **Name**: Benchmark Suite
- **Type**: Performance Optimization Agent
- **Specialization**: Comprehensive performance benchmarking and testing
- **Performance Focus**: Automated benchmarking, regression detection, and performance validation
## Core Capabilities
### 1. Comprehensive Benchmarking Framework
```javascript
// Advanced benchmarking system
class ComprehensiveBenchmarkSuite {
constructor() {
this.benchmarks = {
// Core performance benchmarks
throughput: new ThroughputBenchmark(),
latency: new LatencyBenchmark(),
scalability: new ScalabilityBenchmark(),
resource_usage: new ResourceUsageBenchmark(),
// Swarm-specific benchmarks
coordination: new CoordinationBenchmark(),
load_balancing: new LoadBalancingBenchmark(),
topology: new TopologyBenchmark(),
fault_tolerance: new FaultToleranceBenchmark(),
// Custom benchmarks
custom: new CustomBenchmarkManager()
};
this.reporter = new BenchmarkReporter();
this.comparator = new PerformanceComparator();
this.analyzer = new BenchmarkAnalyzer();
}
// Execute comprehensive benchmark suite
async runBenchmarkSuite(config = {}) {
const suiteConfig = {
duration: config.duration || 300000, // 5 minutes default
iterations: config.iterations || 10,
warmupTime: config.warmupTime || 30000, // 30 seconds
cooldownTime: config.cooldownTime || 10000, // 10 seconds
parallel: config.parallel || false,
baseline: config.baseline || null
};
const results = {
summary: {},
detailed: new Map(),
baseline_comparison: null,
recommendations: []
};
// Warmup phase
await this.warmup(suiteConfig.warmupTime);
// Execute benchmarks
if (suiteConfig.parallel) {
results.detailed = await this.runBenchmarksParallel(suiteConfig);
} else {
results.detailed = await this.runBenchmarksSequential(suiteConfig);
}
// Generate summary
results.summary = this.generateSummary(results.detailed);
// Compare with baseline if provided
if (suiteConfig.baseline) {
results.baseline_comparison = await this.compareWithBaseline(
results.detailed,
suiteConfig.baseline
);
}
// Generate recommendations
results.recommendations = await this.generateRecommendations(results);
// Cooldown phase
await this.cooldown(suiteConfig.cooldownTime);
return results;
}
// Parallel benchmark execution
async runBenchmarksParallel(config) {
const benchmarkPromises = Object.entries(this.benchmarks).map(
async ([name, benchmark]) => {
const result = await this.executeBenchmark(benchmark, name, config);
return [name, result];
}
);
const results = await Promise.all(benchmarkPromises);
return new Map(results);
}
// Sequential benchmark execution
async runBenchmarksSequential(config) {
const results = new Map();
for (const [name, benchmark] of Object.entries(this.benchmarks)) {
const result = await this.executeBenchmark(benchmark, name, config);
results.set(name, result);
// Brief pause between benchmarks
await this.sleep(1000);
}
return results;
}
}
```
### 2. Performance Regression Detection
```javascript
// Advanced regression detection system
class RegressionDetector {
constructor() {
this.detectors = {
statistical: new StatisticalRegressionDetector(),
machine_learning: new MLRegressionDetector(),
threshold: new ThresholdRegressionDetector(),
trend: new TrendRegressionDetector()
};
this.analyzer = new RegressionAnalyzer();
this.alerting = new RegressionAlerting();
}
// Detect performance regressions
async detectRegressions(currentResults, historicalData, config = {}) {
const regressions = {
detected: [],
severity: 'none',
confidence: 0,
analysis: {}
};
// Run multiple detection algorithms
const detectionPromises = Object.entries(this.detectors).map(
async ([method, detector]) => {
const detection = await detector.detect(currentResults, historicalData, config);
return [method, detection];
}
);
const detectionResults = await Promise.all(detectionPromises);
// Aggregate detection results
for (const [method, detection] of detectionResults) {
if (detection.regression_detected) {
regressions.detected.push({
method,
...detection
});
}
}
// Calculate overall confidence and severity
if (regressions.detected.length > 0) {
regressions.confidence = this.calculateAggregateConfidence(regressions.detected);
regressions.severity = this.calculateSeverity(regressions.detected);
regressions.analysis = await this.analyzer.analyze(regressions.detected);
}
return regressions;
}
// Statistical regression detection using change point analysis
async detectStatisticalRegression(metric, historicalData, sensitivity = 0.95) {
// Use CUSUM (Cumulative Sum) algorithm for change point detection
const cusum = this.calculateCUSUM(metric, historicalData);
// Detect change points
const changePoints = this.detectChangePoints(cusum, sensitivity);
// Analyze significance of changes
const analysis = changePoints.map(point => ({
timestamp: point.timestamp,
magnitude: point.magnitude,
direction: point.direction,
significance: point.significance,
confidence: point.confidence
}));
return {
regression_detected: changePoints.length > 0,
change_points: analyAgent skill for adaptive-coordinator - invoke with $agent-adaptive-coordinator
Agent skill for agent - invoke with $agent-agent
Agent skill for agentic-payments - invoke with $agent-agentic-payments
Agent skill for analyze-code-quality - invoke with $agent-analyze-code-quality
Agent skill for app-store - invoke with $agent-app-store
Agent skill for arch-system-design - invoke with $agent-arch-system-design
Agent skill for architecture - invoke with $agent-architecture
Agent skill for authentication - invoke with $agent-authentication