Software

2 minute read

How to Build a Trend Forecasting Tool with Social Scraping

March 27, 2026

Trends emerge on social media before hitting mainstream. By scraping platforms systematically, you can detect rising trends days before they become obvious.

Data Collection

import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import sqlite3, time

class TrendCollector:
    def __init__(self, db_path='trends.db', api_key=None):
        self.db = sqlite3.connect(db_path)
        self.api_key = api_key
        self.session = requests.Session()
        self.session.headers.update({'User-Agent': 'TrendResearch/1.0'})
        self._init_db()

    def _init_db(self):
        self.db.executescript('''
            CREATE TABLE IF NOT EXISTS mentions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                platform TEXT, keyword TEXT, content TEXT,
                engagement INTEGER, timestamp DATETIME, url TEXT);
            CREATE INDEX IF NOT EXISTS idx_kw ON mentions(keyword, timestamp);
        ''')

    def _fetch(self, url):
        if self.api_key:
            return self.session.get(
                f"http://api.scraperapi.com?api_key={self.api_key}&url={url}&render=true")
        return self.session.get(url)

    def collect_reddit(self, keyword, subs=None):
        subs = subs or ['all']
        mentions = []
        for sub in subs:
            resp = self._fetch(
                f"https://old.reddit.com/r/{sub}/search?q={keyword}&sort=new&t=week")
            soup = BeautifulSoup(resp.text, 'html.parser')
            for post in soup.select('.thing.link'):
                title = post.select_one('a.title')
                score = post.select_one('.score.unvoted')
                if title:
                    eng = 0
                    if score:
                        try: eng = int(score.get_text(strip=True))
                        except: pass
                    mentions.append({
                        'platform': 'reddit', 'keyword': keyword,
                        'content': title.get_text(strip=True),
                        'engagement': eng, 'url': title.get('href',''),
                        'timestamp': datetime.now().isoformat()
                    })
            time.sleep(2)
        for m in mentions:
            self.db.execute(
                'INSERT INTO mentions (platform,keyword,content,engagement,timestamp,url) VALUES (?,?,?,?,?,?)',
                (m['platform'],m['keyword'],m['content'],m['engagement'],m['timestamp'],m['url']))
        self.db.commit()
        return mentions

Velocity Engine

The key insight: acceleration matters more than volume.

class VelocityEngine:
    def __init__(self, db):
        self.db = db

    def velocity(self, keyword, hours=24):
        now = datetime.now()
        cur_start = now - timedelta(hours=hours)
        prev_start = cur_start - timedelta(hours=hours)
        cur = self._count(keyword, cur_start, now)
        prev = self._count(keyword, prev_start, cur_start)
        if prev == 0: return float('inf') if cur > 0 else 0
        return round((cur - prev) / prev, 4)

    def _count(self, kw, start, end):
        c = self.db.execute(
            'SELECT COUNT(*) FROM mentions WHERE keyword=? AND timestamp BETWEEN ? AND ?',
            (kw, start.isoformat(), end.isoformat()))
        return c.fetchone()[0]

    def engagement_velocity(self, keyword, hours=24):
        now = datetime.now()
        cur_start = now - timedelta(hours=hours)
        prev_start = cur_start - timedelta(hours=hours)
        cur = self._engagement(keyword, cur_start, now)
        prev = self._engagement(keyword, prev_start, cur_start)
        if prev == 0: return float('inf') if cur > 0 else 0
        return round((cur - prev) / prev, 4)

    def _engagement(self, kw, start, end):
        c = self.db.execute(
            'SELECT COALESCE(SUM(engagement),0) FROM mentions WHERE keyword=? AND timestamp BETWEEN ? AND ?',
            (kw, start.isoformat(), end.isoformat()))
        return c.fetchone()[0]

Breakout Detection

class BreakoutDetector:
    def __init__(self, engine):
        self.engine = engine

    def detect(self, keywords, threshold=0.5):
        results = []
        for kw in keywords:
            mv = self.engine.velocity(kw)
            ev = self.engine.engagement_velocity(kw)
            score = (mv * 0.4) + (ev * 0.6)
            if score > threshold:
                results.append({'keyword': kw, 'mention_vel': mv,
                               'engagement_vel': ev, 'score': round(score, 3)})
        return sorted(results, key=lambda x: x['score'], reverse=True)

Trend Forecasting

class Forecaster:
    def predict(self, keyword, db, points=7):
        cursor = db.execute('''
            SELECT DATE(timestamp), COUNT(*) FROM mentions
            WHERE keyword=? GROUP BY DATE(timestamp)
            ORDER BY DATE(timestamp) DESC LIMIT ?
        ''', (keyword, points))
        data = cursor.fetchall()
        if len(data) < 3: return {'prediction': 'insufficient_data'}
        vols = [r[1] for r in reversed(data)]
        n = len(vols)
        xm = (n-1)/2
        ym = sum(vols)/n
        slope = sum((i-xm)*(v-ym) for i,v in enumerate(vols))
        slope /= sum((i-xm)**2 for i in range(n))
        rate = slope/ym if ym > 0 else 0
        if rate > 0.15: p = 'accelerating'
        elif rate > 0.05: p = 'growing'
        elif rate > -0.05: p = 'stable'
        else: p = 'declining'
        return {'keyword': keyword, 'prediction': p, 'growth_rate': round(rate, 4)}

Social platforms are hard to scrape. ScraperAPI handles anti-bot protections. ThorData provides residential rotation. Track rates with ScrapeOps.

Follow for more Python data science tutorials.

You can now transfer your chats and personal information from other chatbots directly into Gemini

March 26, 2026

AI - Artificial-Intelligence

Anthropic wins injunction against Trump administration over Defense Department saga

March 27, 2026

M	T	W	T	F	S	S
						1
2	3	4	5	6	7	8
9	10	11	12	13	14	15
16	17	18	19	20	21	22
23	24	25	26	27	28	29
30	31

Cookie	Duration	Description
cookielawinfo-checkbox-analytics	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Analytics".
cookielawinfo-checkbox-functional	11 months	The cookie is set by GDPR cookie consent to record the user consent for the cookies in the category "Functional".
cookielawinfo-checkbox-necessary	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookies is used to store the user consent for the cookies in the category "Necessary".
cookielawinfo-checkbox-others	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Other.
cookielawinfo-checkbox-performance	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Performance".
viewed_cookie_policy	11 months	The cookie is set by the GDPR Cookie Consent plugin and is used to store whether or not user has consented to the use of cookies. It does not store any personal data.

Hand-Picked Top-Read Stories

VCP-Virtual Private Cloud

Per-Key Rate Limiting for Agent Tool Calls: Stop One User From Breaking Everything

Pope Leo XIV’s AI Encyclical: What Builders Must Know (2026)

Trending Tags

How to Build a Trend Forecasting Tool with Social Scraping

Data Collection

Velocity Engine

Breakout Detection

Trend Forecasting

Leave a Reply Cancel reply

Previous Post

You can now transfer your chats and personal information from other chatbots directly into Gemini

Next Post

Anthropic wins injunction against Trump administration over Defense Department saga

How to Build a Trend Forecasting Tool with Social Scraping

How to Build a Trend Forecasting Tool with Social Scraping

Data Collection

Velocity Engine

Breakout Detection

Trend Forecasting

Leave a Reply Cancel reply

Previous Post

Next Post

Related Posts