Build a Review Monitoring System with CaptchaAI

Track product or business reviews across platforms, detect sentiment shifts, and receive alerts when negative reviews spike.

Architecture

Scheduler (cron)
    └──> ReviewScraper (per source)
             └──> CAPTCHA Solver (CaptchaAI)
                     └──> Sentiment Analyzer
                             └──> Alerter (email/Slack)

Review Data Model

# models.py
from dataclasses import dataclass
from datetime import datetime
import sqlite3


@dataclass
class Review:
    source: str
    author: str
    rating: float
    text: str
    date: str
    url: str
    sentiment: float = 0.0
    scraped_at: str = ""

    def __post_init__(self):
        if not self.scraped_at:
            self.scraped_at = datetime.now().isoformat()


class ReviewDB:
    def __init__(self, path="reviews.db"):
        self.conn = sqlite3.connect(path)
        self._init()

    def _init(self):
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS reviews (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                source TEXT, author TEXT, rating REAL,
                text TEXT, date TEXT, url TEXT,
                sentiment REAL, scraped_at TEXT,
                UNIQUE(source, author, date, text)
            )
        """)
        self.conn.commit()

    def save(self, review: Review):
        try:
            self.conn.execute(
                """INSERT OR IGNORE INTO reviews
                   (source, author, rating, text, date, url, sentiment, scraped_at)
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                (review.source, review.author, review.rating,
                 review.text, review.date, review.url,
                 review.sentiment, review.scraped_at),
            )
            self.conn.commit()
            return True
        except sqlite3.IntegrityError:
            return False

    def recent_sentiment(self, source, days=7):
        cursor = self.conn.execute(
            """SELECT AVG(sentiment), COUNT(*)
               FROM reviews WHERE source = ?
               AND scraped_at >= datetime('now', ?)""",
            (source, f"-{days} days"),
        )
        return cursor.fetchone()

CAPTCHA Solver

# solver.py
import requests
import time
import os


def solve_recaptcha(sitekey, pageurl):
    api_key = os.environ["CAPTCHAAI_API_KEY"]

    resp = requests.post("https://ocr.captchaai.com/in.php", data={
        "key": api_key,
        "method": "userrecaptcha",
        "googlekey": sitekey,
        "pageurl": pageurl,
        "json": 1,
    }, timeout=30)
    task_id = resp.json()["request"]

    time.sleep(15)
    for _ in range(24):
        resp = requests.get("https://ocr.captchaai.com/res.php", params={
            "key": api_key, "action": "get",
            "id": task_id, "json": 1,
        }, timeout=15)
        data = resp.json()
        if data.get("status") == 1:
            return data["request"]
        if data["request"] != "CAPCHA_NOT_READY":
            raise RuntimeError(data["request"])
        time.sleep(5)

    raise TimeoutError("Solve timed out")

Review Scraper

# scraper.py
import requests
import re
from bs4 import BeautifulSoup
from solver import solve_recaptcha
from models import Review


class ReviewScraper:
    def __init__(self, source, url_template, selectors):
        self.source = source
        self.url_template = url_template
        self.selectors = selectors
        self.session = requests.Session()
        self.session.headers["User-Agent"] = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/125.0.0.0 Safari/537.36"
        )

    def scrape(self, product_id, max_pages=5):
        reviews = []

        for page in range(1, max_pages + 1):
            url = self.url_template.format(product_id=product_id, page=page)
            html = self._fetch(url)
            page_reviews = self._parse(html, url)

            if not page_reviews:
                break
            reviews.extend(page_reviews)

        return reviews

    def _fetch(self, url):
        resp = self.session.get(url, timeout=20)

        if "data-sitekey" in resp.text:
            match = re.search(r'data-sitekey="([^"]+)"', resp.text)
            if match:
                token = solve_recaptcha(match.group(1), url)
                resp = self.session.post(url, data={
                    "g-recaptcha-response": token,
                }, timeout=30)

        return resp.text

    def _parse(self, html, url):
        soup = BeautifulSoup(html, "html.parser")
        cards = soup.select(self.selectors["card"])
        reviews = []

        for card in cards:
            author_el = card.select_one(self.selectors.get("author", ".author"))
            text_el = card.select_one(self.selectors.get("text", ".review-text"))
            rating_el = card.select_one(self.selectors.get("rating", ".stars"))
            date_el = card.select_one(self.selectors.get("date", ".review-date"))

            if not text_el:
                continue

            rating = self._parse_rating(rating_el)

            reviews.append(Review(
                source=self.source,
                author=author_el.get_text(strip=True) if author_el else "Anonymous",
                rating=rating,
                text=text_el.get_text(strip=True),
                date=date_el.get_text(strip=True) if date_el else "",
                url=url,
            ))

        return reviews

    def _parse_rating(self, el):
        if not el:
            return 0.0
        # Try aria-label, class-based, or text
        label = el.get("aria-label", "")
        match = re.search(r'([\d.]+)', label)
        if match:
            return float(match.group(1))
        text = el.get_text(strip=True)
        match = re.search(r'([\d.]+)', text)
        return float(match.group(1)) if match else 0.0

Sentiment Analyzer

# sentiment.py


def analyze_sentiment(text):
    """Simple keyword-based sentiment scoring (-1.0 to 1.0)."""
    positive = [
        "great", "excellent", "amazing", "love", "perfect",
        "fast", "easy", "reliable", "recommend", "best",
    ]
    negative = [
        "terrible", "awful", "slow", "broken", "worst",
        "hate", "useless", "scam", "waste", "horrible",
    ]

    words = text.lower().split()
    pos = sum(1 for w in words if w in positive)
    neg = sum(1 for w in words if w in negative)
    total = pos + neg

    if total == 0:
        return 0.0

    return round((pos - neg) / total, 2)

Alert System

# alerter.py
import smtplib
from email.mime.text import MIMEText


def send_alert(subject, body, to_email, smtp_config):
    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["To"] = to_email
    msg["From"] = smtp_config["from"]

    with smtplib.SMTP(smtp_config["host"], smtp_config["port"]) as server:
        server.starttls()
        server.login(smtp_config["user"], smtp_config["password"])
        server.send_message(msg)


def check_and_alert(db, source, threshold=-0.3, to_email="team@example.com"):
    avg_sentiment, count = db.recent_sentiment(source, days=7)

    if avg_sentiment is not None and avg_sentiment < threshold:
        send_alert(
            subject=f"Review Alert: {source} sentiment dropped to {avg_sentiment:.2f}",
            body=f"Average sentiment over {count} reviews in the past 7 days: {avg_sentiment:.2f}\n"
                 f"Threshold: {threshold}",
            to_email=to_email,
            smtp_config={
                "host": "smtp.example.com",
                "port": 587,
                "user": "alerts@example.com",
                "password": "YOUR_SMTP_PASSWORD",
                "from": "alerts@example.com",
            },
        )

Main Runner

# main.py
import time
from models import ReviewDB
from scraper import ReviewScraper
from sentiment import analyze_sentiment
from alerter import check_and_alert

SOURCES = [
    {
        "source": "Platform A",
        "url_template": "https://platform-a.example.com/product/{product_id}/reviews?page={page}",
        "selectors": {
            "card": ".review-card",
            "author": ".reviewer-name",
            "text": ".review-body",
            "rating": ".star-rating",
            "date": ".review-date",
        },
        "products": ["product-123", "product-456"],
    },
]


def main():
    db = ReviewDB()

    for config in SOURCES:
        scraper = ReviewScraper(config["source"], config["url_template"], config["selectors"])

        for product_id in config["products"]:
            print(f"Scraping {config['source']} — {product_id}")
            reviews = scraper.scrape(product_id)

            for review in reviews:
                review.sentiment = analyze_sentiment(review.text)
                is_new = db.save(review)
                if is_new:
                    print(f"  New: {review.rating}★ sentiment={review.sentiment}")

            time.sleep(5)

        check_and_alert(db, config["source"])

    print("Done. Check alerts.")


if __name__ == "__main__":
    main()

FAQ

How accurate is keyword-based sentiment?

Good enough for trend detection. For production, swap in a model like transformers or a sentiment API for higher accuracy.

Can I monitor reviews hourly?

You can, but most review sites update slowly. Running every 4-12 hours is usually sufficient and reduces CAPTCHA solve costs.

How do I add a new review platform?

Add a new entry in SOURCES with the URL template and CSS selectors matching the platform's review page HTML.

Monitor reviews at scale — start with CaptchaAI.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Build a Review Monitoring System with CaptchaAI

Architecture

Review Data Model

CAPTCHA Solver

Review Scraper

Sentiment Analyzer

Alert System

Main Runner

FAQ

How accurate is keyword-based sentiment?

Can I monitor reviews hourly?

How do I add a new review platform?

Discussions (0)

Event Ticket Monitoring with CAPTCHA Handling

CaptchaAI Health Check and Monitoring Script

Airline Fare Monitoring with CAPTCHA Handling

Supply Chain Monitoring with CAPTCHA Handling

CAPTCHA Handling for Auction Site Monitoring

Build a Competitor Analysis Dashboard with CaptchaAI

Architecture

Review Data Model

CAPTCHA Solver

Review Scraper

Sentiment Analyzer

Alert System

Main Runner

FAQ

How accurate is keyword-based sentiment?

Can I monitor reviews hourly?

How do I add a new review platform?

Related Guides

Discussions (0)

Join the conversation

Related Posts

Event Ticket Monitoring with CAPTCHA Handling

CaptchaAI Health Check and Monitoring Script

Airline Fare Monitoring with CAPTCHA Handling

Supply Chain Monitoring with CAPTCHA Handling

CAPTCHA Handling for Auction Site Monitoring

Build a Competitor Analysis Dashboard with CaptchaAI