Skip to main content
ClaudeWave
Skill71 estrellas del repoactualizado yesterday

ai-video-generator

>-

Instalar en Claude Code
Copiar
git clone --depth 1 https://github.com/TerminalSkills/skills /tmp/ai-video-generator && cp -r /tmp/ai-video-generator/skills/ai-video-generator ~/.claude/skills/ai-video-generator
Después abre una sesión nueva de Claude Code; el skill carga automáticamente.

SKILL.md

# AI Video Generator — Short-Form Content Pipeline

## Overview

Automate creation of short-form videos (TikTok, YouTube Shorts, Instagram Reels) using AI for every step: topic research, script writing, text-to-speech narration, stock footage matching, subtitle generation, and final assembly. Inspired by [MoneyPrinterTurbo](https://github.com/harry0703/MoneyPrinterTurbo) (53k+ stars).

## Instructions

### Step 1: Set Up the Environment

```bash
pip install anthropic openai requests moviepy pydub whisperx srt
sudo apt install ffmpeg  # Linux — or: brew install ffmpeg (macOS)
```

**API keys needed:** Anthropic or OpenAI (scripts), ElevenLabs or OpenAI TTS (voice), Pexels (free stock footage).

### Step 2: AI Script Writing

```python
import anthropic

def generate_script(topic, duration_seconds=45):
    """Generate a video script optimized for short-form content."""
    client = anthropic.Anthropic()
    prompt = f"""Write a {duration_seconds}-second video script about: {topic}
    Format:
    HOOK (first 3 seconds): A shocking statement or question that stops scrolling
    BODY (main content): 3-5 punchy facts or points, each 1-2 sentences
    CTA (last 5 seconds): Call to action — follow, like, comment
    Rules:
    - Conversational, no complex sentences
    - Each sentence on its own line
    - ~{duration_seconds * 2.5:.0f} words ({duration_seconds}s at 150wpm)
    - Use power words: secret, shocking, nobody tells you, actually
    - No emojis or hashtags — this is a voiceover script
    """
    response = client.messages.create(
        model="claude-sonnet-4-20250514", max_tokens=500,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.content[0].text
```

### Step 3: Text-to-Speech Narration

```python
import requests, os

def generate_voice_elevenlabs(text, output_path='narration.mp3'):
    """Generate voiceover using ElevenLabs."""
    url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM"
    headers = {"xi-api-key": os.environ["ELEVENLABS_API_KEY"], "Content-Type": "application/json"}
    data = {"text": text, "model_id": "eleven_turbo_v2_5",
            "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}}
    response = requests.post(url, json=data, headers=headers)
    with open(output_path, 'wb') as f:
        f.write(response.content)
    return output_path

def generate_voice_openai(text, output_path='narration.mp3'):
    """Generate voiceover using OpenAI TTS (cheaper alternative)."""
    from openai import OpenAI
    client = OpenAI()
    response = client.audio.speech.create(model="tts-1-hd", voice="onyx", input=text)
    response.stream_to_file(output_path)
    return output_path
```

### Step 4: Stock Footage Selection

```python
def search_pexels_videos(query, count=5):
    """Search Pexels for portrait-oriented stock video clips."""
    url = "https://api.pexels.com/videos/search"
    headers = {"Authorization": os.environ["PEXELS_API_KEY"]}
    params = {"query": query, "per_page": count, "orientation": "portrait", "size": "medium"}
    response = requests.get(url, headers=headers, params=params)
    videos = response.json().get('videos', [])
    results = []
    for v in videos:
        files = sorted(v['video_files'], key=lambda x: x.get('height', 0), reverse=True)
        hd = next((f for f in files if f.get('height', 0) >= 720), files[0])
        results.append({'id': v['id'], 'url': hd['link'], 'duration': v['duration']})
    return results
```

### Step 5: Subtitle Generation

```python
def generate_subtitles(audio_path, output_srt='subtitles.srt'):
    """Generate word-level subtitles using WhisperX."""
    import whisperx, srt
    from datetime import timedelta
    model = whisperx.load_model("base", device="cpu")
    audio = whisperx.load_audio(audio_path)
    result = model.transcribe(audio)
    align_model, metadata = whisperx.load_align_model(language_code="en")
    aligned = whisperx.align(result["segments"], align_model, metadata, audio)
    subs = []
    words = [w for seg in aligned["segments"] for w in seg.get("words", [])]
    for i in range(0, len(words), 4):
        group = words[i:i + 4]
        if not group: continue
        start = timedelta(seconds=group[0].get('start', 0))
        end = timedelta(seconds=group[-1].get('end', 0))
        text = ' '.join(w['word'] for w in group)
        subs.append(srt.Subtitle(index=len(subs)+1, start=start, end=end, content=text))
    with open(output_srt, 'w') as f:
        f.write(srt.compose(subs))
    return output_srt
```

### Step 6: Video Assembly with FFmpeg

```python
import subprocess

def assemble_video(clips, narration, subtitles, output='final.mp4'):
    """Assemble final video: concatenate clips, add narration and subtitles."""
    concat_list = 'concat_list.txt'
    with open(concat_list, 'w') as f:
        for clip in clips:
            f.write(f"file '{clip}'\n")
    subprocess.run([
        'ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', concat_list,
        '-vf', 'scale=1080:1920:force_original_aspect_ratio=increase,crop=1080:1920',
        '-c:v', 'libx264', '-preset', 'fast', '-an', 'temp_video.mp4'
    ], check=True)
    subtitle_filter = (f"subtitles={subtitles}:force_style='"
        "FontName=Arial,FontSize=18,PrimaryColour=&H00FFFFFF,"
        "OutlineColour=&H00000000,Outline=2,Bold=1,Alignment=2'")
    subprocess.run([
        'ffmpeg', '-y', '-i', 'temp_video.mp4', '-i', narration,
        '-vf', subtitle_filter, '-c:v', 'libx264', '-c:a', 'aac',
        '-shortest', output
    ], check=True)
    return output
```

### Step 7: Full Pipeline

```python
def generate_video(topic, output_dir='./output'):
    """Complete pipeline: topic -> finished video."""
    import os
    os.makedirs(output_dir, exist_ok=True)
    script = generate_script(topic)
    narration = generate_voice_elevenlabs(script, f'{output_dir}/narration.mp3')
    keywords = topic.split()[:3]
    videos = search_pexels_videos(' '.join(keywords), coun