Build a Social Listening Tool From Scratch: Multi-Platform Keyword Monitoring
Social listening tools like Brandwatch and Sprout Social charge $300-3,000/month. Most of what they do is search social platforms for keywords and aggregate the results.
You can build your own for under $50/month. Here's exactly how.
Architecture Overview
Here's what we're building:
Keywords → Search APIs (Twitter, Threads, Reddit) → Normalize → Store → Alert
The system will:
- Search multiple platforms for your keywords
- Normalize results into a standard format
- Score sentiment
- Store results in a local JSON database
- Send alerts for high-priority mentions
Step 1: Define Your Keywords
Start with a keyword configuration file:
// config.js
const config = {
keywords: [
// Your brand
{ term: 'sociavault', category: 'brand', priority: 'high' },
{ term: 'socia vault', category: 'brand', priority: 'high' },
// Competitors
{ term: 'brandwatch', category: 'competitor', priority: 'medium' },
{ term: 'sprout social', category: 'competitor', priority: 'medium' },
{ term: 'hootsuite', category: 'competitor', priority: 'medium' },
// Industry terms
{ term: 'social media analytics', category: 'industry', priority: 'low' },
{ term: 'influencer marketing tool', category: 'industry', priority: 'low' },
{ term: 'social media scraping', category: 'industry', priority: 'low' },
],
platforms: ['twitter', 'threads', 'reddit'],
alerts: {
slackWebhook: process.env.SLACK_WEBHOOK_URL,
highPriorityOnly: false
}
};
module.exports = config;
Step 2: Multi-Platform Search Engine
The core of the tool — searching across platforms:
const API_KEY = process.env.SOCIAVAULT_API_KEY;
const BASE = 'https://api.sociavault.com/v1/scrape';
const headers = { 'X-API-Key': API_KEY };
async function searchTwitter(keyword) {
const res = await fetch(
`${BASE}/twitter/search?query=${encodeURIComponent(keyword)}`,
{ headers }
);
const results = (await res.json()).data || [];
return results.map(tweet => ({
platform: 'twitter',
text: tweet.legacy?.full_text || tweet.full_text || tweet.text || '',
author: tweet.core?.user_results?.result?.legacy?.screen_name
|| tweet.user?.screen_name || tweet.author || 'unknown',
likes: tweet.legacy?.favorite_count || tweet.favorite_count || 0,
shares: tweet.legacy?.retweet_count || tweet.retweet_count || 0,
comments: tweet.legacy?.reply_count || tweet.reply_count || 0,
url: tweet.url || null,
timestamp: tweet.legacy?.created_at || tweet.created_at || null,
id: tweet.rest_id || tweet.id || null
}));
}
async function searchThreads(keyword) {
const res = await fetch(
`${BASE}/threads/search?query=${encodeURIComponent(keyword)}`,
{ headers }
);
const results = (await res.json()).data || [];
return results.map(post => ({
platform: 'threads',
text: post.caption?.text || post.text || '',
author: post.user?.username || post.username || 'unknown',
likes: post.like_count || 0,
shares: post.text_post_app_info?.reshare_count || post.reshare_count || 0,
comments: post.text_post_app_info?.direct_reply_count || post.reply_count || 0,
url: post.url || null,
timestamp: post.taken_at || post.created_at || null,
id: post.id || null
}));
}
async function searchReddit(keyword) {
const res = await fetch(
`${BASE}/reddit/search?query=${encodeURIComponent(keyword)}`,
{ headers }
);
const results = (await res.json()).data || [];
return results.map(post => ({
platform: 'reddit',
text: post.title + (post.selftext ? ` — ${post.selftext.substring(0, 300)}` : ''),
author: post.author || 'unknown',
likes: post.score || post.ups || 0,
shares: 0,
comments: post.num_comments || 0,
subreddit: post.subreddit || 'unknown',
url: post.url || null,
timestamp: post.created_utc || null,
id: post.id || null
}));
}
async function searchAllPlatforms(keyword) {
const [twitter, threads, reddit] = await Promise.all([
searchTwitter(keyword).catch(() => []),
searchThreads(keyword).catch(() => []),
searchReddit(keyword).catch(() => [])
]);
return [...twitter, ...threads, ...reddit];
}
Step 3: Sentiment Analysis
You don't need an ML model for basic sentiment analysis. Keyword-based scoring works surprisingly well:
const positiveWords = new Set([
'love', 'great', 'amazing', 'awesome', 'best', 'excellent', 'perfect',
'fantastic', 'recommend', 'helpful', 'easy', 'fast', 'reliable',
'beautiful', 'impressive', 'favorite', 'winner', 'incredible'
]);
const negativeWords = new Set([
'hate', 'terrible', 'awful', 'worst', 'broken', 'bug', 'slow',
'expensive', 'scam', 'useless', 'disappointed', 'frustrating',
'annoying', 'horrible', 'trash', 'waste', 'overpriced', 'sucks'
]);
function analyzeSentiment(text) {
const words = text.toLowerCase().split(/\s+/);
let positive = 0;
let negative = 0;
for (const word of words) {
const clean = word.replace(/[^a-z]/g, '');
if (positiveWords.has(clean)) positive++;
if (negativeWords.has(clean)) negative++;
}
const total = positive + negative;
if (total === 0) return { score: 0, label: 'neutral' };
const score = (positive - negative) / total;
let label = 'neutral';
if (score > 0.3) label = 'positive';
else if (score < -0.3) label = 'negative';
return { score: Math.round(score * 100) / 100, label, positive, negative };
}
// Enhance mentions with sentiment
function enrichMention(mention) {
const sentiment = analyzeSentiment(mention.text);
return {
...mention,
sentiment: sentiment.label,
sentimentScore: sentiment.score
};
}
Step 4: Storage and Deduplication
Store results locally with dedup to avoid processing the same mention twice:
const fs = require('fs');
const path = require('path');
const DATA_FILE = path.join(__dirname, 'mentions.json');
function loadMentions() {
try {
return JSON.parse(fs.readFileSync(DATA_FILE, 'utf8'));
} catch {
return { mentions: [], seenIds: [] };
}
}
function saveMentions(data) {
fs.writeFileSync(DATA_FILE, JSON.stringify(data, null, 2));
}
function addMentions(newMentions) {
const data = loadMentions();
const seenSet = new Set(data.seenIds);
let added = 0;
for (const mention of newMentions) {
const uniqueKey = `${mention.platform}-${mention.id || mention.text.substring(0, 50)}`;
if (seenSet.has(uniqueKey)) continue;
seenSet.add(uniqueKey);
data.seenIds.push(uniqueKey);
data.mentions.push({
...enrichMention(mention),
collectedAt: new Date().toISOString()
});
added++;
}
saveMentions(data);
return added;
}
Step 5: Alert System
Send Slack alerts for important mentions:
async function sendSlackAlert(mention, keyword) {
const webhookUrl = process.env.SLACK_WEBHOOK_URL;
if (!webhookUrl) return;
const emoji = mention.sentiment === 'positive' ? '🟢'
: mention.sentiment === 'negative' ? '🔴'
: '⚪';
const payload = {
blocks: [
{
type: 'header',
text: {
type: 'plain_text',
text: `${emoji} New mention: "${keyword.term}"`
}
},
{
type: 'section',
fields: [
{ type: 'mrkdwn', text: `*Platform:* ${mention.platform}` },
{ type: 'mrkdwn', text: `*Author:* ${mention.author}` },
{ type: 'mrkdwn', text: `*Sentiment:* ${mention.sentiment}` },
{ type: 'mrkdwn', text: `*Engagement:* ${mention.likes + mention.comments + mention.shares}` }
]
},
{
type: 'section',
text: {
type: 'mrkdwn',
text: `> ${mention.text.substring(0, 500)}`
}
}
]
};
if (mention.url) {
payload.blocks.push({
type: 'actions',
elements: [{
type: 'button',
text: { type: 'plain_text', text: 'View Post' },
url: mention.url
}]
});
}
await fetch(webhookUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
});
}
Step 6: Put It All Together
The main runner that ties everything together:
const config = require('./config');
async function runListeningCycle() {
console.log(`\n${'='.repeat(60)}`);
console.log(` Social Listening Scan — ${new Date().toISOString()}`);
console.log('='.repeat(60));
let totalNew = 0;
const sentimentSummary = { positive: 0, negative: 0, neutral: 0 };
for (const keyword of config.keywords) {
console.log(`\n Searching: "${keyword.term}" (${keyword.category})`);
const mentions = await searchAllPlatforms(keyword.term);
console.log(` Found ${mentions.length} mentions across all platforms`);
const added = addMentions(mentions);
totalNew += added;
console.log(` ${added} new mentions stored`);
// Process new mentions
for (const mention of mentions) {
const enriched = enrichMention(mention);
sentimentSummary[enriched.sentiment]++;
// Send alerts for high-priority or negative mentions
const shouldAlert = keyword.priority === 'high'
|| enriched.sentiment === 'negative'
|| (enriched.likes + enriched.comments) > 50;
if (shouldAlert && added > 0) {
await sendSlackAlert(enriched, keyword);
}
}
// Rate limit between keywords
await new Promise(r => setTimeout(r, 2000));
}
console.log(`\n Summary:`);
console.log(` New mentions: ${totalNew}`);
console.log(` Positive: ${sentimentSummary.positive}`);
console.log(` Negative: ${sentimentSummary.negative}`);
console.log(` Neutral: ${sentimentSummary.neutral}`);
return { totalNew, sentimentSummary };
}
// Run every 4 hours
runListeningCycle();
Schedule it with cron:
# Run every 4 hours
0 */4 * * * cd /path/to/social-listener && node index.js >> logs/listener.log 2>&1
Full Python Version
Here's the complete tool in Python:
import os
import json
import time
import requests
from datetime import datetime
API_KEY = os.environ["SOCIAVAULT_API_KEY"]
BASE = "https://api.sociavault.com/v1/scrape"
HEADERS = {"X-API-Key": API_KEY}
DATA_FILE = "mentions.json"
KEYWORDS = [
{"term": "your brand", "category": "brand", "priority": "high"},
{"term": "competitor name", "category": "competitor", "priority": "medium"},
{"term": "industry term", "category": "industry", "priority": "low"},
]
POSITIVE = {"love", "great", "amazing", "awesome", "best", "excellent", "recommend", "helpful", "fantastic"}
NEGATIVE = {"hate", "terrible", "awful", "worst", "broken", "slow", "scam", "useless", "disappointed", "frustrating"}
def search_platform(platform, keyword):
"""Search a single platform for a keyword"""
endpoint_map = {
"twitter": "twitter/search",
"threads": "threads/search",
"reddit": "reddit/search",
}
endpoint = endpoint_map.get(platform)
if not endpoint:
return []
try:
r = requests.get(f"{BASE}/{endpoint}", headers=HEADERS, params={"query": keyword})
return r.json().get("data", [])
except Exception:
return []
def normalize(platform, raw_results):
"""Normalize results into standard format"""
mentions = []
for item in raw_results:
if platform == "twitter":
text = (item.get("legacy", {}).get("full_text") or item.get("text", ""))
author = (item.get("core", {}).get("user_results", {}).get("result", {}).get("legacy", {}).get("screen_name") or item.get("author", "unknown"))
likes = item.get("legacy", {}).get("favorite_count", 0) if isinstance(item.get("legacy"), dict) else 0
elif platform == "threads":
text = (item.get("caption", {}) or {}).get("text", "") or item.get("text", "")
author = (item.get("user", {}) or {}).get("username", "unknown")
likes = item.get("like_count", 0)
elif platform == "reddit":
text = item.get("title", "") + " " + item.get("selftext", "")[:200]
author = item.get("author", "unknown")
likes = item.get("score", 0)
else:
continue
mentions.append({
"platform": platform,
"text": text.strip(),
"author": author,
"likes": likes,
"id": str(item.get("id") or item.get("rest_id", ""))
})
return mentions
def get_sentiment(text):
words = set(text.lower().split())
pos = len(words & POSITIVE)
neg = len(words & NEGATIVE)
if pos > neg: return "positive"
if neg > pos: return "negative"
return "neutral"
def run_scan():
# Load seen IDs
try:
with open(DATA_FILE) as f:
data = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
data = {"mentions": [], "seen": []}
seen = set(data["seen"])
new_count = 0
summary = {"positive": 0, "negative": 0, "neutral": 0}
for kw in KEYWORDS:
print(f"\nSearching: \"{kw['term']}\"")
for platform in ["twitter", "threads", "reddit"]:
raw = search_platform(platform, kw["term"])
mentions = normalize(platform, raw)
for m in mentions:
uid = f"{m['platform']}-{m['id']}"
if uid in seen:
continue
seen.add(uid)
sentiment = get_sentiment(m["text"])
summary[sentiment] += 1
data["mentions"].append({
**m,
"keyword": kw["term"],
"category": kw["category"],
"sentiment": sentiment,
"collected": datetime.now().isoformat()
})
new_count += 1
time.sleep(1)
data["seen"] = list(seen)
with open(DATA_FILE, "w") as f:
json.dump(data, f, indent=2)
print(f"\n{'='*50}")
print(f" Scan Complete: {new_count} new mentions")
print(f" Positive: {summary['positive']} | Negative: {summary['negative']} | Neutral: {summary['neutral']}")
print(f" Total stored: {len(data['mentions'])}")
run_scan()
Cost Estimate
| Component | Credits/Run | Runs/Day | Daily Cost |
|---|---|---|---|
| 8 keywords × 3 platforms | 24 credits | 6 | 144 credits |
| Monthly total | — | — | ~4,320 credits |
| Estimated cost | — | — | $10-20/month |
Compare that to Brandwatch ($800+/month) or Sprout Social ($299+/month).
What Real Social Listening Tools Charge
| Tool | Monthly Cost | What You Get |
|---|---|---|
| Brandwatch | $800+ | Full listening, sentiment, analytics |
| Sprout Social | $299+ | Listening + management |
| Mention | $49+ | Basic keyword monitoring |
| Brand24 | $79+ | Keyword tracking, basic sentiment |
| DIY with SociaVault | $10-20 | Same keyword monitoring, you control the data |
Get Started
Sign up free — build your own social listening tool today.
Related Reading
Found this helpful?
Share it with others who might benefit
Ready to Try SociaVault?
Start extracting social media data with our powerful API. No credit card required.