E-commerce platforms protect product pages and inventory data with CAPTCHAs to prevent competitor scraping and bot purchasing. CaptchaAI enables automated stock monitoring, price tracking, and availability alerts.
CAPTCHAs on E-Commerce Platforms
| Platform | CAPTCHA Type | Trigger | Data |
|---|---|---|---|
| Amazon | reCAPTCHA v2 | High-volume access | Price, stock, reviews |
| Walmart | reCAPTCHA v3 + Cloudflare | Bot detection | Inventory, pricing |
| Best Buy | reCAPTCHA v2 | Add-to-cart check | Stock, pricing |
| Target | Cloudflare Turnstile | Automated access | Availability |
| Shopify stores | Cloudflare Turnstile | Rate limiting | Product data |
| eBay | reCAPTCHA v2 | Search + listing access | Listings, prices |
Product Monitor
import requests
import time
import re
import json
from datetime import datetime
from bs4 import BeautifulSoup
CAPTCHAAI_KEY = "YOUR_API_KEY"
CAPTCHAAI_URL = "https://ocr.captchaai.com"
def solve_captcha(method, sitekey, pageurl, **kwargs):
data = {
"key": CAPTCHAAI_KEY, "method": method,
"googlekey": sitekey, "pageurl": pageurl, "json": 1,
}
data.update(kwargs)
resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data=data)
task_id = resp.json()["request"]
for _ in range(60):
time.sleep(5)
result = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
"key": CAPTCHAAI_KEY, "action": "get",
"id": task_id, "json": 1,
})
r = result.json()
if r["request"] != "CAPCHA_NOT_READY":
return r["request"]
raise TimeoutError("Timeout")
class RetailMonitor:
def __init__(self, proxy=None):
self.session = requests.Session()
if proxy:
self.session.proxies = {"http": proxy, "https": proxy}
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
})
def check_product(self, url):
"""Check single product's price and availability."""
resp = self.session.get(url, timeout=30)
# Handle CAPTCHA
if self._has_captcha(resp.text):
resp = self._solve_and_retry(resp.text, url)
soup = BeautifulSoup(resp.text, "html.parser")
return {
"url": url,
"title": self._text(soup, "h1, .product-title, #productTitle"),
"price": self._text(soup, ".price, .a-price .a-offscreen, .prod-price"),
"availability": self._text(soup, "#availability, .stock-status, .fulfillment"),
"in_stock": self._check_stock(soup),
"timestamp": datetime.now().isoformat(),
}
def monitor_products(self, product_urls, interval_sec=1800):
"""Continuously monitor products for changes."""
history = {}
while True:
for url in product_urls:
try:
current = self.check_product(url)
# Check for changes
prev = history.get(url)
if prev:
changes = self._detect_changes(prev, current)
if changes:
self._alert(current["title"], changes)
history[url] = current
time.sleep(3)
except Exception as e:
print(f"Error checking {url}: {e}")
print(f"Cycle complete: {len(product_urls)} products checked")
time.sleep(interval_sec)
def track_prices(self, product_urls, output_file="prices.json"):
"""Single price check across all products."""
results = []
for url in product_urls:
try:
data = self.check_product(url)
results.append(data)
time.sleep(3)
except Exception as e:
results.append({"url": url, "error": str(e)})
with open(output_file, "w") as f:
json.dump(results, f, indent=2)
print(f"Tracked {len(results)} products → {output_file}")
return results
def _has_captcha(self, html):
return any(tag in html.lower() for tag in [
'data-sitekey', 'g-recaptcha', 'cf-turnstile', 'captcha',
])
def _solve_and_retry(self, html, url):
match = re.search(r'data-sitekey="([^"]+)"', html)
if not match:
return self.session.get(url)
sitekey = match.group(1)
if 'cf-turnstile' in html:
token = solve_captcha("turnstile", sitekey, url)
return self.session.post(url, data={"cf-turnstile-response": token})
else:
token = solve_captcha("userrecaptcha", sitekey, url)
return self.session.post(url, data={"g-recaptcha-response": token})
def _text(self, soup, selector):
el = soup.select_one(selector)
return el.get_text(strip=True) if el else ""
def _check_stock(self, soup):
stock_el = soup.select_one("#availability, .stock-status")
if stock_el:
text = stock_el.get_text(strip=True).lower()
return "in stock" in text or "available" in text
return None
def _detect_changes(self, prev, current):
changes = []
if prev["price"] != current["price"]:
changes.append(f"Price: {prev['price']} → {current['price']}")
if prev["in_stock"] != current["in_stock"]:
status = "In Stock" if current["in_stock"] else "Out of Stock"
changes.append(f"Stock: → {status}")
return changes
def _alert(self, title, changes):
print(f"ALERT [{title}]: {', '.join(changes)}")
# Usage
monitor = RetailMonitor(
proxy="http://user:pass@residential.proxy.com:5000"
)
products = [
"https://store.example.com/product/abc123",
"https://store.example.com/product/def456",
"https://store.example.com/product/ghi789",
]
# One-time price check
results = monitor.track_prices(products)
# Or continuous monitoring (every 30 min)
# monitor.monitor_products(products, interval_sec=1800)
Category-Wide Stock Scanning
def scan_category(base_url, category, max_pages=20):
"""Scan an entire product category for stock status."""
monitor = RetailMonitor(
proxy="http://user:pass@residential.proxy.com:5000"
)
all_products = []
for page in range(1, max_pages + 1):
url = f"{base_url}/{category}?page={page}"
resp = monitor.session.get(url, timeout=30)
if monitor._has_captcha(resp.text):
resp = monitor._solve_and_retry(resp.text, url)
soup = BeautifulSoup(resp.text, "html.parser")
items = soup.select(".product-card, .s-result-item")
if not items:
break
for item in items:
all_products.append({
"name": monitor._text(item, ".product-name, .a-text-normal"),
"price": monitor._text(item, ".price, .a-price"),
"stock": monitor._text(item, ".stock, .a-color-success"),
"url": item.select_one("a")["href"] if item.select_one("a") else "",
})
time.sleep(3)
return all_products
Competitor Price Comparison
def compare_product_across_stores(product_name, stores):
"""Compare prices across retailers for the same product."""
results = []
for store in stores:
monitor = RetailMonitor(proxy=store.get("proxy"))
search_url = f"{store['base_url']}/search?q={product_name}"
try:
resp = monitor.session.get(search_url, timeout=30)
if monitor._has_captcha(resp.text):
resp = monitor._solve_and_retry(resp.text, search_url)
soup = BeautifulSoup(resp.text, "html.parser")
first_result = soup.select_one(".product-card, .s-result-item")
if first_result:
results.append({
"store": store["name"],
"price": monitor._text(first_result, ".price"),
"in_stock": "in stock" in first_result.get_text().lower(),
})
except Exception as e:
results.append({"store": store["name"], "error": str(e)})
time.sleep(5)
results.sort(key=lambda x: x.get("price", "zzzz"))
return results
Monitoring Schedule
| Product Type | Check Frequency | Proxy Type |
|---|---|---|
| Electronics | Every 30 min | Rotating residential |
| Groceries | Every 4 hours | Rotating residential |
| Fashion | Every 2 hours | Residential |
| Limited releases | Every 1 min | Mobile proxy |
| Home goods | Every 6 hours | Residential |
| Commodity goods | Every 12 hours | Datacenter (often sufficient) |
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| CAPTCHA on every product page | IP flagged or rate exceeded | Increase delay, rotate proxies |
| Incorrect price scraped | Dynamic pricing rendered by JS | Use Selenium/Puppeteer instead |
| "Robot check" page | Amazon-style bot detection | Use realistic headers + residential proxy |
| Stock shows "unavailable" | Geo-restricted availability | Match proxy to target region |
| Missing product data | Page structure changed | Update CSS selectors |
FAQ
How frequently can I check product pages?
Every 30-60 minutes per product is safe for most retailers. Higher-frequency monitoring (1-5 min) works but requires more proxies and triggers CAPTCHAs.
Should I use rotating or sticky sessions?
Rotating — each product page is an independent request. Sticky sessions are only needed if checking product details requires navigation.
Can I monitor thousands of products?
Yes. Spread requests across multiple proxy IPs and stagger checks. At 1,000 products with 3-second delays, a full cycle takes ~50 minutes.
Related Guides
Track retail inventory in real time — get your CaptchaAI key for automated CAPTCHA handling.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.