Build an AI Content Idea Generator with GPT + Social Media Data
Here's the problem with AI content tools:
They generate ideas based on generic training data. "10 Tips for Small Business Owners." "How to Boost Your Productivity."
Boring. Unoriginal. Won't perform.
Here's the solution:
Feed GPT real social media data. Show it what's actually working right now—the videos getting millions of views, the posts with crazy engagement, the trends that are emerging.
Then ask it to generate ideas based on that data.
The result? Content ideas grounded in reality. Ideas that have proof of concept. Ideas with actual viral potential.
Let me show you how to build this.
Need social media data? Get started free at sociavault.com/free/social-media-api.
The Architecture
┌─────────────────────────────────────────────┐
│ Your Prompt / Niche │
└─────────────────┬───────────────────────────┘
│
▼
┌─────────────────────────────────────────────┐
│ SociaVault API - Fetch Trending Data │
│ │
│ • Top posts in your niche │
│ • Trending hashtags │
│ • Viral video patterns │
│ • Engagement metrics │
└─────────────────┬───────────────────────────┘
│
▼
┌─────────────────────────────────────────────┐
│ GPT-4 - Analyze & Generate │
│ │
│ • Pattern recognition │
│ • Content angle extraction │
│ • Idea generation │
│ • Optimization suggestions │
└─────────────────┬───────────────────────────┘
│
▼
┌─────────────────────────────────────────────┐
│ Output: Content Ideas + Scripts │
└─────────────────────────────────────────────┘
Project Setup
mkdir ai-content-generator
cd ai-content-generator
npm init -y
npm install openai dotenv readline
Create .env:
SOCIAVAULT_API_KEY=your_sociavault_key
OPENAI_API_KEY=your_openai_key
Step 1: Fetch Trending Content
First, we need to get real data from social platforms:
// data-fetcher.js
require('dotenv').config();
const API_KEY = process.env.SOCIAVAULT_API_KEY;
const API_BASE = 'https://api.sociavault.com/v1/scrape';
async function fetchTikTokTrending(keyword, count = 20) {
const response = await fetch(
`${API_BASE}/tiktok/search?query=${encodeURIComponent(keyword)}&count=${count}`,
{ headers: { 'Authorization': `Bearer ${API_KEY}` } }
);
const data = await response.json();
return data.data?.videos || [];
}
async function fetchInstagramHashtag(hashtag, count = 20) {
const response = await fetch(
`${API_BASE}/instagram/hashtag?name=${encodeURIComponent(hashtag)}&count=${count}`,
{ headers: { 'Authorization': `Bearer ${API_KEY}` } }
);
const data = await response.json();
return data.data?.posts || [];
}
async function fetchYouTubeSearch(keyword, count = 10) {
const response = await fetch(
`${API_BASE}/youtube/search?query=${encodeURIComponent(keyword)}&count=${count}`,
{ headers: { 'Authorization': `Bearer ${API_KEY}` } }
);
const data = await response.json();
return data.data?.videos || [];
}
async function fetchTwitterTrending(keyword, count = 30) {
const response = await fetch(
`${API_BASE}/twitter/search?query=${encodeURIComponent(keyword)}&count=${count}`,
{ headers: { 'Authorization': `Bearer ${API_KEY}` } }
);
const data = await response.json();
return data.data?.tweets || [];
}
// Analyze content for patterns
function analyzeContent(content) {
// Sort by engagement
const sorted = content.sort((a, b) => {
const engA = (a.like_count || 0) + (a.comment_count || 0) * 2;
const engB = (b.like_count || 0) + (b.comment_count || 0) * 2;
return engB - engA;
});
const top10 = sorted.slice(0, 10);
return top10.map(item => ({
// TikTok
description: item.description || item.text || item.caption,
views: item.play_count || item.view_count,
likes: item.like_count,
comments: item.comment_count,
shares: item.share_count,
duration: item.duration,
hashtags: extractHashtags(item.description || item.text || item.caption || ''),
author: item.author?.username
}));
}
function extractHashtags(text) {
const matches = text.match(/#[\w]+/g);
return matches ? matches.slice(0, 10) : [];
}
async function gatherNicheData(niche, keywords = []) {
console.log(`\n📊 Gathering data for niche: ${niche}`);
const searchTerms = [niche, ...keywords];
const allData = {
tiktok: [],
instagram: [],
youtube: [],
twitter: []
};
for (const term of searchTerms) {
console.log(` Searching: ${term}`);
try {
const tiktokData = await fetchTikTokTrending(term, 15);
allData.tiktok.push(...tiktokData);
await sleep(500);
const instaData = await fetchInstagramHashtag(term.replace(/\s+/g, ''), 15);
allData.instagram.push(...instaData);
await sleep(500);
const ytData = await fetchYouTubeSearch(term, 5);
allData.youtube.push(...ytData);
await sleep(500);
const twitterData = await fetchTwitterTrending(term, 20);
allData.twitter.push(...twitterData);
await sleep(500);
} catch (error) {
console.log(` Error: ${error.message}`);
}
}
return {
tiktok: analyzeContent(allData.tiktok),
instagram: analyzeContent(allData.instagram),
youtube: analyzeContent(allData.youtube),
twitter: analyzeContent(allData.twitter)
};
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
module.exports = { gatherNicheData, fetchTikTokTrending, fetchInstagramHashtag };
Step 2: GPT Integration
Now let's connect GPT-4 to analyze this data:
// gpt-analyzer.js
require('dotenv').config();
const OpenAI = require('openai');
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
});
const SYSTEM_PROMPT = `You are an expert content strategist and social media analyst.
You analyze viral content patterns and generate content ideas based on real data.
When given social media data, you:
1. Identify patterns in what's performing well
2. Extract hooks, formats, and angles that work
3. Generate original content ideas based on these patterns
4. Provide specific, actionable scripts and outlines
Be specific. Don't give generic advice. Base everything on the data provided.
Format your output clearly with headers and bullet points.`;
async function analyzePatterns(platformData) {
const prompt = `Analyze this social media data and identify patterns in top-performing content:
TIKTOK TOP PERFORMERS:
${JSON.stringify(platformData.tiktok.slice(0, 5), null, 2)}
INSTAGRAM TOP PERFORMERS:
${JSON.stringify(platformData.instagram.slice(0, 5), null, 2)}
YOUTUBE TOP PERFORMERS:
${JSON.stringify(platformData.youtube.slice(0, 5), null, 2)}
TWITTER TOP PERFORMERS:
${JSON.stringify(platformData.twitter.slice(0, 5), null, 2)}
Identify:
1. Common hooks used in top content
2. Content formats that perform well
3. Topics/angles getting engagement
4. Patterns in hashtag usage
5. Optimal content length/duration`;
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: prompt }
],
temperature: 0.7
});
return response.choices[0].message.content;
}
async function generateContentIdeas(platformData, niche, count = 10) {
const patterns = await analyzePatterns(platformData);
const prompt = `Based on this analysis of trending content in the "${niche}" niche:
${patterns}
Generate ${count} unique content ideas that:
1. Use proven hooks and formats from the data
2. Have a unique angle or twist
3. Are specific and actionable
4. Include platform recommendations (TikTok, Instagram, YouTube, Twitter)
For each idea, provide:
- Title/hook
- Brief description (2-3 sentences)
- Best platform(s)
- Key hashtags
- Estimated engagement potential (Low/Medium/High/Viral)`;
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: prompt }
],
temperature: 0.8
});
return response.choices[0].message.content;
}
async function generateScript(idea, platform, duration = '60 seconds') {
const prompt = `Create a complete ${platform} script for this content idea:
"${idea}"
Requirements:
- Duration: ${duration}
- Hook in first 3 seconds
- Clear structure (hook, body, CTA)
- Platform-native style
- Include visual/transition suggestions in [brackets]
Format:
1. HOOK (first 3 seconds)
2. BODY (main content)
3. CTA (call to action)
4. Caption with hashtags
5. Posting tips`;
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: prompt }
],
temperature: 0.7
});
return response.choices[0].message.content;
}
async function generateContentCalendar(platformData, niche, days = 7) {
const ideas = await generateContentIdeas(platformData, niche, days * 2);
const prompt = `Create a ${days}-day content calendar using these ideas:
${ideas}
For each day, assign:
- 1 primary post (most effort)
- 1 secondary post (quick/reactive)
- Best posting time
- Platform priority
Include:
- Content batching suggestions
- Theme days if appropriate
- Engagement strategy for each post`;
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: prompt }
],
temperature: 0.6
});
return response.choices[0].message.content;
}
module.exports = {
analyzePatterns,
generateContentIdeas,
generateScript,
generateContentCalendar
};
Step 3: Main Application
// index.js
require('dotenv').config();
const readline = require('readline');
const { gatherNicheData } = require('./data-fetcher');
const {
analyzePatterns,
generateContentIdeas,
generateScript,
generateContentCalendar
} = require('./gpt-analyzer');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
function ask(question) {
return new Promise(resolve => rl.question(question, resolve));
}
async function main() {
console.log('\n🚀 AI Content Idea Generator');
console.log('============================\n');
// Get user input
const niche = await ask('What niche are you creating content for? ');
const keywordsInput = await ask('Additional keywords (comma-separated, optional): ');
const keywords = keywordsInput ? keywordsInput.split(',').map(k => k.trim()) : [];
console.log('\n⏳ Gathering social media data...');
const platformData = await gatherNicheData(niche, keywords);
console.log('\n📈 Data collected:');
console.log(` TikTok: ${platformData.tiktok.length} top posts`);
console.log(` Instagram: ${platformData.instagram.length} top posts`);
console.log(` YouTube: ${platformData.youtube.length} top videos`);
console.log(` Twitter: ${platformData.twitter.length} top tweets`);
let running = true;
while (running) {
console.log('\n📝 Options:');
console.log(' 1. Analyze patterns in trending content');
console.log(' 2. Generate content ideas');
console.log(' 3. Generate a script for an idea');
console.log(' 4. Generate a content calendar');
console.log(' 5. Exit');
const choice = await ask('\nChoice (1-5): ');
switch (choice) {
case '1':
console.log('\n🔍 Analyzing patterns...\n');
const patterns = await analyzePatterns(platformData);
console.log(patterns);
break;
case '2':
const count = await ask('How many ideas? (default: 10): ');
console.log('\n💡 Generating ideas...\n');
const ideas = await generateContentIdeas(
platformData,
niche,
parseInt(count) || 10
);
console.log(ideas);
break;
case '3':
const idea = await ask('Paste the content idea: ');
const platform = await ask('Platform (tiktok/instagram/youtube): ');
const duration = await ask('Duration (e.g., 30 seconds, 2 minutes): ');
console.log('\n🎬 Generating script...\n');
const script = await generateScript(idea, platform, duration);
console.log(script);
break;
case '4':
const days = await ask('How many days? (default: 7): ');
console.log('\n📅 Generating calendar...\n');
const calendar = await generateContentCalendar(
platformData,
niche,
parseInt(days) || 7
);
console.log(calendar);
break;
case '5':
running = false;
console.log('\nGoodbye! 👋\n');
break;
default:
console.log('Invalid choice. Try again.');
}
}
rl.close();
}
main().catch(console.error);
Python Version
# content_generator.py
import os
import json
import requests
from openai import OpenAI
from typing import List, Dict, Optional
# Initialize clients
SOCIAVAULT_KEY = os.getenv('SOCIAVAULT_API_KEY')
OPENAI_KEY = os.getenv('OPENAI_API_KEY')
openai_client = OpenAI(api_key=OPENAI_KEY)
API_BASE = 'https://api.sociavault.com/v1/scrape'
SYSTEM_PROMPT = """You are an expert content strategist and social media analyst.
You analyze viral content patterns and generate content ideas based on real data.
When given social media data, you:
1. Identify patterns in what's performing well
2. Extract hooks, formats, and angles that work
3. Generate original content ideas based on these patterns
4. Provide specific, actionable scripts and outlines
Be specific. Don't give generic advice. Base everything on the data provided."""
class ContentGenerator:
def __init__(self):
self.headers = {'Authorization': f'Bearer {SOCIAVAULT_KEY}'}
def fetch_tiktok(self, keyword: str, count: int = 20) -> List[Dict]:
"""Fetch trending TikTok videos for a keyword."""
response = requests.get(
f'{API_BASE}/tiktok/search',
params={'query': keyword, 'count': count},
headers=self.headers
)
response.raise_for_status()
return response.json().get('data', {}).get('videos', [])
def fetch_instagram(self, hashtag: str, count: int = 20) -> List[Dict]:
"""Fetch Instagram posts for a hashtag."""
response = requests.get(
f'{API_BASE}/instagram/hashtag',
params={'name': hashtag.replace(' ', ''), 'count': count},
headers=self.headers
)
response.raise_for_status()
return response.json().get('data', {}).get('posts', [])
def fetch_youtube(self, keyword: str, count: int = 10) -> List[Dict]:
"""Fetch YouTube videos for a keyword."""
response = requests.get(
f'{API_BASE}/youtube/search',
params={'query': keyword, 'count': count},
headers=self.headers
)
response.raise_for_status()
return response.json().get('data', {}).get('videos', [])
def gather_niche_data(self, niche: str, keywords: List[str] = None) -> Dict:
"""Gather data from all platforms for a niche."""
search_terms = [niche] + (keywords or [])
all_data = {
'tiktok': [],
'instagram': [],
'youtube': []
}
for term in search_terms:
print(f' Searching: {term}')
try:
all_data['tiktok'].extend(self.fetch_tiktok(term, 15))
all_data['instagram'].extend(self.fetch_instagram(term, 15))
all_data['youtube'].extend(self.fetch_youtube(term, 5))
except Exception as e:
print(f' Error: {e}')
# Analyze and keep top performers
return {
platform: self._analyze_content(data)
for platform, data in all_data.items()
}
def _analyze_content(self, content: List[Dict]) -> List[Dict]:
"""Sort and analyze content by engagement."""
sorted_content = sorted(
content,
key=lambda x: (x.get('like_count', 0) + x.get('comment_count', 0) * 2),
reverse=True
)
return [
{
'description': item.get('description') or item.get('caption', ''),
'views': item.get('play_count') or item.get('view_count', 0),
'likes': item.get('like_count', 0),
'comments': item.get('comment_count', 0),
'author': item.get('author', {}).get('username', '')
}
for item in sorted_content[:10]
]
def analyze_patterns(self, platform_data: Dict) -> str:
"""Use GPT to analyze patterns in the data."""
prompt = f"""Analyze this social media data and identify patterns:
TIKTOK TOP PERFORMERS:
{json.dumps(platform_data.get('tiktok', [])[:5], indent=2)}
INSTAGRAM TOP PERFORMERS:
{json.dumps(platform_data.get('instagram', [])[:5], indent=2)}
YOUTUBE TOP PERFORMERS:
{json.dumps(platform_data.get('youtube', [])[:5], indent=2)}
Identify:
1. Common hooks used in top content
2. Content formats that perform well
3. Topics/angles getting engagement
4. Patterns in hashtag usage"""
response = openai_client.chat.completions.create(
model='gpt-4',
messages=[
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': prompt}
],
temperature=0.7
)
return response.choices[0].message.content
def generate_ideas(self, platform_data: Dict, niche: str, count: int = 10) -> str:
"""Generate content ideas based on trending data."""
patterns = self.analyze_patterns(platform_data)
prompt = f"""Based on this analysis of trending content in "{niche}":
{patterns}
Generate {count} unique content ideas that:
1. Use proven hooks and formats from the data
2. Have a unique angle or twist
3. Are specific and actionable
4. Include platform recommendations
For each idea, provide:
- Title/hook
- Brief description
- Best platform(s)
- Key hashtags
- Engagement potential (Low/Medium/High/Viral)"""
response = openai_client.chat.completions.create(
model='gpt-4',
messages=[
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': prompt}
],
temperature=0.8
)
return response.choices[0].message.content
def generate_script(self, idea: str, platform: str, duration: str = '60 seconds') -> str:
"""Generate a full script for a content idea."""
prompt = f"""Create a complete {platform} script for:
"{idea}"
Requirements:
- Duration: {duration}
- Hook in first 3 seconds
- Clear structure (hook, body, CTA)
- Platform-native style
- Include visual suggestions in [brackets]
Format:
1. HOOK (first 3 seconds)
2. BODY (main content)
3. CTA (call to action)
4. Caption with hashtags"""
response = openai_client.chat.completions.create(
model='gpt-4',
messages=[
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': prompt}
],
temperature=0.7
)
return response.choices[0].message.content
# Example usage
if __name__ == '__main__':
generator = ContentGenerator()
# Define your niche
niche = 'small business tips'
keywords = ['entrepreneur', 'side hustle', 'business advice']
print(f'\n📊 Gathering data for: {niche}')
data = generator.gather_niche_data(niche, keywords)
print(f'\n📈 Collected:')
print(f' TikTok: {len(data["tiktok"])} posts')
print(f' Instagram: {len(data["instagram"])} posts')
print(f' YouTube: {len(data["youtube"])} videos')
print('\n🔍 Analyzing patterns...')
patterns = generator.analyze_patterns(data)
print(patterns)
print('\n💡 Generating content ideas...')
ideas = generator.generate_ideas(data, niche, count=5)
print(ideas)
Real Example: Fitness Niche
Let me show you what this generates in practice.
Input:
- Niche: "fitness"
- Keywords: ["workout", "gym motivation", "home workout"]
Output from pattern analysis:
## PATTERNS IDENTIFIED
### Hooks That Work:
1. "POV: You're at the gym and..." (relatable scenarios)
2. "Stop doing [common exercise] like this" (correction content)
3. "This one exercise changed my [body part]" (transformation hooks)
4. Numbers in hooks: "3 exercises for...", "The only 5 moves you need"
### Formats Performing Well:
- Quick tutorials (15-30 seconds)
- Before/after splits
- "Watch this if..." qualification hooks
- Workout POVs with trending audio
### Topics Getting Engagement:
- Home workout alternatives to gym exercises
- "Beginner-friendly" positioning
- Time-efficient workouts (10 min, 5 min)
- Debunking fitness myths
Generated Ideas:
1. "The gym bros don't want you to know this chest exercise"
- Platform: TikTok, Instagram Reels
- Format: Tutorial with demonstration
- Hashtags: #fitness #chestworkout #gymtips
- Potential: High
2. "POV: You finally stopped ego lifting"
- Platform: TikTok
- Format: Transformation/relatable content
- Hashtags: #gymtok #fitnesstips #gains
- Potential: Viral
3. "Replace your 1-hour workout with this 12-minute routine"
- Platform: YouTube Shorts, TikTok
- Format: Full workout demonstration
- Hashtags: #homeworkout #quickworkout #fitness
- Potential: High
Integrating with Your Workflow
Save Ideas to Database
// Save generated ideas for later
const Database = require('better-sqlite3');
const db = new Database('content-ideas.db');
db.exec(`
CREATE TABLE IF NOT EXISTS ideas (
id INTEGER PRIMARY KEY,
niche TEXT,
title TEXT,
description TEXT,
platform TEXT,
hashtags TEXT,
potential TEXT,
status TEXT DEFAULT 'draft',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)
`);
function saveIdea(idea) {
db.prepare(`
INSERT INTO ideas (niche, title, description, platform, hashtags, potential)
VALUES (?, ?, ?, ?, ?, ?)
`).run(
idea.niche,
idea.title,
idea.description,
idea.platform,
JSON.stringify(idea.hashtags),
idea.potential
);
}
Export to Notion or Trello
async function exportToNotion(ideas, databaseId) {
// Integrate with Notion API
for (const idea of ideas) {
await notion.pages.create({
parent: { database_id: databaseId },
properties: {
'Title': { title: [{ text: { content: idea.title } }] },
'Platform': { select: { name: idea.platform } },
'Status': { select: { name: 'To Create' } },
'Potential': { select: { name: idea.potential } }
}
});
}
}
Tips for Better Results
1. Be Specific with Your Niche
❌ "Business"
✅ "SaaS startup founders"
❌ "Fitness"
✅ "Calisthenics for beginners over 30"
2. Add Competitor Keywords
const keywords = [
'yourcompetitor1',
'yourcompetitor2',
'industryleader'
];
GPT will analyze what's working for them.
3. Iterate on Ideas
Take GPT's ideas and ask for variations:
const refinementPrompt = `
Take this content idea and give me 5 variations:
- Different hook
- Different angle
- Different platform optimization
- More controversial version
- Beginner-friendly version
Original: "${idea}"
`;
4. Validate Before Creating
Use the data to validate ideas:
async function validateIdea(idea) {
// Search for similar content
const existing = await fetchTikTokTrending(idea, 20);
// If similar content exists and performs well = validated
// If nothing similar exists = risky but could be unique
// If similar content exists but performs poorly = avoid
}
The Bottom Line
Generic AI content tools give you generic ideas.
Feed GPT real data and you get ideas grounded in what's actually working.
This approach:
- ✅ Uses proof-of-concept from viral content
- ✅ Adapts to current trends automatically
- ✅ Creates platform-specific content
- ✅ Saves hours of manual research
Ready to generate data-driven content ideas?
Get social media data at sociavault.com and start building your AI content engine.
More tutorials:
Found this helpful?
Share it with others who might benefit
Ready to Try SociaVault?
Start extracting social media data with our powerful API. No credit card required.