Tutorials

Build a Price Comparison Bot with Python and CaptchaAI

Build a bot that scrapes prices from multiple websites, handles CAPTCHAs automatically, and outputs a comparison report.


Project Structure

price_bot/
├── config.py        # API keys and store configs
├── solver.py        # CaptchaAI integration
├── scraper.py       # Per-store scrapers
├── compare.py       # Price comparison logic
└── main.py          # Entry point

CAPTCHA Solver Module

# solver.py
import requests
import time


class CaptchaSolver:
    def __init__(self, api_key):
        self.api_key = api_key

    def solve_recaptcha(self, sitekey, pageurl):
        resp = requests.post("https://ocr.captchaai.com/in.php", data={
            "key": self.api_key,
            "method": "userrecaptcha",
            "googlekey": sitekey,
            "pageurl": pageurl,
            "json": 1,
        }, timeout=30)
        result = resp.json()
        if result.get("status") != 1:
            raise RuntimeError(result.get("request"))

        task_id = result["request"]
        time.sleep(15)

        for _ in range(24):
            resp = requests.get("https://ocr.captchaai.com/res.php", params={
                "key": self.api_key, "action": "get",
                "id": task_id, "json": 1,
            }, timeout=15)
            data = resp.json()
            if data.get("status") == 1:
                return data["request"]
            if data["request"] != "CAPCHA_NOT_READY":
                raise RuntimeError(data["request"])
            time.sleep(5)

        raise TimeoutError("Solve timeout")

Store Scraper

# scraper.py
import requests
import re
import time
from bs4 import BeautifulSoup


class StoreScraper:
    """Generic store scraper with CAPTCHA handling."""

    def __init__(self, solver):
        self.solver = solver
        self.session = requests.Session()
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                          "AppleWebKit/537.36 Chrome/125.0.0.0 Safari/537.36",
        })

    def scrape_price(self, url):
        """Fetch page, handle CAPTCHA if present, extract price."""
        resp = self.session.get(url, timeout=15)

        # Check for CAPTCHA
        if 'data-sitekey' in resp.text:
            resp = self._solve_and_retry(url, resp.text)

        return self._extract_price(resp.text, url)

    def _solve_and_retry(self, url, html):
        """Solve CAPTCHA and re-fetch."""
        match = re.search(r'data-sitekey="([^"]+)"', html)
        if not match:
            return None

        sitekey = match.group(1)
        token = self.solver.solve_recaptcha(sitekey, url)

        # Submit with token
        resp = self.session.post(url, data={
            "g-recaptcha-response": token,
        }, timeout=30)

        return resp

    def _extract_price(self, html, url):
        """Extract price from HTML."""
        soup = BeautifulSoup(html, "html.parser")

        # Try common price selectors
        selectors = [
            ".price", ".product-price", "[data-price]",
            ".price-current", ".offer-price", "#priceblock_ourprice",
        ]

        for selector in selectors:
            el = soup.select_one(selector)
            if el:
                price_text = el.get_text(strip=True)
                # Extract numeric value
                match = re.search(r'[\d,]+\.?\d*', price_text.replace(",", ""))
                if match:
                    return {
                        "price": float(match.group()),
                        "currency": self._detect_currency(price_text),
                        "url": url,
                    }

        return {"price": None, "currency": None, "url": url}

    def _detect_currency(self, text):
        if "$" in text: return "USD"
        if "€" in text: return "EUR"
        if "£" in text: return "GBP"
        return "USD"

Price Comparison Engine

# compare.py
from datetime import datetime


def compare_prices(product_name, price_data):
    """Compare prices from multiple sources."""
    valid = [p for p in price_data if p.get("price") is not None]

    if not valid:
        return {"product": product_name, "error": "No prices found"}

    sorted_prices = sorted(valid, key=lambda x: x["price"])
    best = sorted_prices[0]
    worst = sorted_prices[-1]

    return {
        "product": product_name,
        "best_price": best["price"],
        "best_source": best["url"],
        "worst_price": worst["price"],
        "savings": round(worst["price"] - best["price"], 2),
        "savings_pct": round((1 - best["price"] / worst["price"]) * 100, 1),
        "all_prices": sorted_prices,
        "checked_at": datetime.now().isoformat(),
    }


def format_report(comparisons):
    """Format comparison results as text report."""
    lines = ["=" * 60, "Price Comparison Report", "=" * 60, ""]

    for comp in comparisons:
        if "error" in comp:
            lines.append(f"{comp['product']}: {comp['error']}")
            continue

        lines.append(f"Product: {comp['product']}")
        lines.append(f"  Best:    ${comp['best_price']:.2f}")
        lines.append(f"  Source:  {comp['best_source']}")
        lines.append(f"  Savings: ${comp['savings']:.2f} ({comp['savings_pct']}%)")

        for p in comp["all_prices"]:
            lines.append(f"    ${p['price']:.2f} — {p['url']}")
        lines.append("")

    return "\n".join(lines)

Main Runner

# main.py
import os
import time
from solver import CaptchaSolver
from scraper import StoreScraper
from compare import compare_prices, format_report

PRODUCTS = [
    {
        "name": "Wireless Headphones",
        "urls": [
            "https://store-a.example.com/headphones-xyz",
            "https://store-b.example.com/product/headphones-xyz",
            "https://store-c.example.com/electronics/headphones-xyz",
        ],
    },
]


def main():
    api_key = os.environ["CAPTCHAAI_API_KEY"]
    solver = CaptchaSolver(api_key)
    scraper = StoreScraper(solver)

    comparisons = []

    for product in PRODUCTS:
        print(f"Checking prices for: {product['name']}")
        prices = []

        for url in product["urls"]:
            try:
                price = scraper.scrape_price(url)
                prices.append(price)
                print(f"  {url}: ${price.get('price', 'N/A')}")
            except Exception as e:
                print(f"  {url}: Error — {e}")

            time.sleep(3)

        comparison = compare_prices(product["name"], prices)
        comparisons.append(comparison)

    report = format_report(comparisons)
    print(report)

    # Save report
    with open("price_report.txt", "w") as f:
        f.write(report)


if __name__ == "__main__":
    main()

FAQ

How often should I check prices?

Daily is sufficient for most products. For flash sales or competitive monitoring, every 4-6 hours. Rate limit to avoid blocks.

How do I handle different currencies?

Convert to a base currency using a free exchange rate API before comparing. Store the original currency for reference.

Can I run this as a scheduled job?

Yes. Use cron (Linux), Task Scheduler (Windows), or a cloud scheduler to run the bot daily.



Build your price bot — start with CaptchaAI.

Discussions (0)

No comments yet.

Related Posts

Use Cases Multi-Step Workflow Automation with CaptchaAI
Manage workflows across multiple accounts on CAPTCHA-protected platforms — , action, and data collection at scale.

Manage workflows across multiple accounts on CAPTCHA-protected platforms — , action, and data collection at sc...

Automation Python reCAPTCHA v2
Apr 06, 2026
Reference CAPTCHA Token Injection Methods Reference
Complete reference for injecting solved CAPTCHA tokens into web pages.

Complete reference for injecting solved CAPTCHA tokens into web pages. Covers re CAPTCHA, Turnstile, and Cloud...

Automation Python reCAPTCHA v2
Apr 08, 2026
Tutorials Pytest Fixtures for CaptchaAI API Testing
Build reusable pytest fixtures to test CAPTCHA-solving workflows with Captcha AI.

Build reusable pytest fixtures to test CAPTCHA-solving workflows with Captcha AI. Covers mocking, live integra...

Automation Python reCAPTCHA v2
Apr 08, 2026
Reference Browser Session Persistence for CAPTCHA Workflows
Manage browser sessions, cookies, and storage across CAPTCHA-solving runs to reduce repeat challenges and maintain authenticated state.

Manage browser sessions, cookies, and storage across CAPTCHA-solving runs to reduce repeat challenges and main...

Automation Python reCAPTCHA v2
Feb 24, 2026
Integrations Browser Profile Isolation + CaptchaAI Integration
Browser profile isolation tools create distinct browser environments with unique fingerprints per session.

Browser profile isolation tools create distinct browser environments with unique fingerprints per session. Com...

Automation Python reCAPTCHA v2
Feb 21, 2026
Comparisons WebDriver vs Chrome DevTools Protocol for CAPTCHA Automation
Compare Web Driver and Chrome Dev Tools Protocol (CDP) for CAPTCHA automation — detection, performance, capabilities, and when to use each with Captcha AI.

Compare Web Driver and Chrome Dev Tools Protocol (CDP) for CAPTCHA automation — detection, performance, capabi...

Automation Python reCAPTCHA v2
Mar 27, 2026
Tutorials Securing CaptchaAI Credentials in Environment Variables
Store Captcha AI API keys securely using environment variables, .env files, Docker secrets, and cloud secret managers instead of hardcoding.

Store Captcha AI API keys securely using environment variables, .env files, Docker secrets, and cloud secret m...

Automation Python reCAPTCHA v2
Feb 12, 2026
Use Cases Event Ticket Monitoring with CAPTCHA Handling
Build an event ticket availability monitor that handles CAPTCHAs using Captcha AI.

Build an event ticket availability monitor that handles CAPTCHAs using Captcha AI. Python workflow for checkin...

Automation Python reCAPTCHA v2
Jan 17, 2026
Explainers reCAPTCHA v2 Invisible: Trigger Detection and Solving
Detect and solve re CAPTCHA v 2 Invisible challenges with Captcha AI — identify triggers, extract parameters, and handle auto-invoked CAPTCHAs.

Detect and solve re CAPTCHA v 2 Invisible challenges with Captcha AI — identify triggers, extract parameters,...

Automation Python reCAPTCHA v2
Apr 07, 2026
Use Cases CAPTCHA Solving in Ticket Purchase Automation
How to handle CAPTCHAs on ticketing platforms Ticketmaster, AXS, and event sites using Captcha AI for automated purchasing workflows.

How to handle CAPTCHAs on ticketing platforms Ticketmaster, AXS, and event sites using Captcha AI for automate...

Automation Python reCAPTCHA v2
Feb 25, 2026
Tutorials Using Fiddler to Inspect CaptchaAI API Traffic
How to use Fiddler Everywhere and Fiddler Classic to capture, inspect, and debug Captcha AI API requests and responses — filters, breakpoints, and replay for tr...

How to use Fiddler Everywhere and Fiddler Classic to capture, inspect, and debug Captcha AI API requests and r...

Automation Python All CAPTCHA Types
Mar 05, 2026
Tutorials GeeTest Token Injection in Browser Automation Frameworks
how to inject Gee Test v 3 solution tokens into Playwright, Puppeteer, and Selenium — including the three-value response, callback triggering, and form submissi...

Learn how to inject Gee Test v 3 solution tokens into Playwright, Puppeteer, and Selenium — including the thre...

Automation Python Testing
Jan 18, 2026