Skip to main content
ClaudeWave
Skill59.2k estrellas del repoactualizado today

agent-performance-monitor

The Performance Monitor agent collects real-time metrics across system, agent, task, resource, and network dimensions, then analyzes them to identify bottlenecks, detect anomalies, and track service-level agreement compliance. Use this skill when you need continuous performance visibility, want to identify optimization opportunities, or require automated SLA violation alerts across distributed agent systems.

Instalar en Claude Code
Copiar
git clone --depth 1 https://github.com/ruvnet/ruflo /tmp/agent-performance-monitor && cp -r /tmp/agent-performance-monitor/.agents/skills/agent-performance-monitor ~/.claude/skills/agent-performance-monitor
Después abre una sesión nueva de Claude Code; el skill carga automáticamente.

SKILL.md

---
name: Performance Monitor
type: agent
category: optimization
description: Real-time metrics collection, bottleneck analysis, SLA monitoring and anomaly detection
---

# Performance Monitor Agent

## Agent Profile
- **Name**: Performance Monitor
- **Type**: Performance Optimization Agent
- **Specialization**: Real-time metrics collection and bottleneck analysis
- **Performance Focus**: SLA monitoring, resource tracking, and anomaly detection

## Core Capabilities

### 1. Real-Time Metrics Collection
```javascript
// Advanced metrics collection system
class MetricsCollector {
  constructor() {
    this.collectors = new Map();
    this.aggregators = new Map();
    this.streams = new Map();
    this.alertThresholds = new Map();
  }
  
  // Multi-dimensional metrics collection
  async collectMetrics() {
    const metrics = {
      // System metrics
      system: await this.collectSystemMetrics(),
      
      // Agent-specific metrics
      agents: await this.collectAgentMetrics(),
      
      // Swarm coordination metrics
      coordination: await this.collectCoordinationMetrics(),
      
      // Task execution metrics
      tasks: await this.collectTaskMetrics(),
      
      // Resource utilization metrics
      resources: await this.collectResourceMetrics(),
      
      // Network and communication metrics
      network: await this.collectNetworkMetrics()
    };
    
    // Real-time processing and analysis
    await this.processMetrics(metrics);
    return metrics;
  }
  
  // System-level metrics
  async collectSystemMetrics() {
    return {
      cpu: {
        usage: await this.getCPUUsage(),
        loadAverage: await this.getLoadAverage(),
        coreUtilization: await this.getCoreUtilization()
      },
      memory: {
        usage: await this.getMemoryUsage(),
        available: await this.getAvailableMemory(),
        pressure: await this.getMemoryPressure()
      },
      io: {
        diskUsage: await this.getDiskUsage(),
        diskIO: await this.getDiskIOStats(),
        networkIO: await this.getNetworkIOStats()
      },
      processes: {
        count: await this.getProcessCount(),
        threads: await this.getThreadCount(),
        handles: await this.getHandleCount()
      }
    };
  }
  
  // Agent performance metrics
  async collectAgentMetrics() {
    const agents = await mcp.agent_list({});
    const agentMetrics = new Map();
    
    for (const agent of agents) {
      const metrics = await mcp.agent_metrics({ agentId: agent.id });
      agentMetrics.set(agent.id, {
        ...metrics,
        efficiency: this.calculateEfficiency(metrics),
        responsiveness: this.calculateResponsiveness(metrics),
        reliability: this.calculateReliability(metrics)
      });
    }
    
    return agentMetrics;
  }
}
```

### 2. Bottleneck Detection & Analysis
```javascript
// Intelligent bottleneck detection
class BottleneckAnalyzer {
  constructor() {
    this.detectors = [
      new CPUBottleneckDetector(),
      new MemoryBottleneckDetector(),
      new IOBottleneckDetector(),
      new NetworkBottleneckDetector(),
      new CoordinationBottleneckDetector(),
      new TaskQueueBottleneckDetector()
    ];
    
    this.patterns = new Map();
    this.history = new CircularBuffer(1000);
  }
  
  // Multi-layer bottleneck analysis
  async analyzeBottlenecks(metrics) {
    const bottlenecks = [];
    
    // Parallel detection across all layers
    const detectionPromises = this.detectors.map(detector => 
      detector.detect(metrics)
    );
    
    const results = await Promise.all(detectionPromises);
    
    // Correlate and prioritize bottlenecks
    for (const result of results) {
      if (result.detected) {
        bottlenecks.push({
          type: result.type,
          severity: result.severity,
          component: result.component,
          rootCause: result.rootCause,
          impact: result.impact,
          recommendations: result.recommendations,
          timestamp: Date.now()
        });
      }
    }
    
    // Pattern recognition for recurring bottlenecks
    await this.updatePatterns(bottlenecks);
    
    return this.prioritizeBottlenecks(bottlenecks);
  }
  
  // Advanced pattern recognition
  async updatePatterns(bottlenecks) {
    for (const bottleneck of bottlenecks) {
      const signature = this.createBottleneckSignature(bottleneck);
      
      if (this.patterns.has(signature)) {
        const pattern = this.patterns.get(signature);
        pattern.frequency++;
        pattern.lastOccurrence = Date.now();
        pattern.averageInterval = this.calculateAverageInterval(pattern);
      } else {
        this.patterns.set(signature, {
          signature,
          frequency: 1,
          firstOccurrence: Date.now(),
          lastOccurrence: Date.now(),
          averageInterval: 0,
          predictedNext: null
        });
      }
    }
  }
}
```

### 3. SLA Monitoring & Alerting
```javascript
// Service Level Agreement monitoring
class SLAMonitor {
  constructor() {
    this.slaDefinitions = new Map();
    this.violations = new Map();
    this.alertChannels = new Set();
    this.escalationRules = new Map();
  }
  
  // Define SLA metrics and thresholds
  defineSLA(service, slaConfig) {
    this.slaDefinitions.set(service, {
      availability: slaConfig.availability || 99.9, // percentage
      responseTime: slaConfig.responseTime || 1000, // milliseconds
      throughput: slaConfig.throughput || 100, // requests per second
      errorRate: slaConfig.errorRate || 0.1, // percentage
      recoveryTime: slaConfig.recoveryTime || 300, // seconds
      
      // Time windows for measurements
      measurementWindow: slaConfig.measurementWindow || 300, // seconds
      evaluationInterval: slaConfig.evaluationInterval || 60, // seconds
      
      // Alerting configuration
      alertThresholds: slaConfig.alertThresholds || {
        warning: 0.8, // 80% of SLA threshold
        critical: 0.9, // 90% of SLA threshold
        breach: 1.0