Travel booking sites aggressively block automated fare checks with CAPTCHAs and bot detection. CaptchaAI lets you monitor flight and hotel prices reliably through these defenses.
CAPTCHA Landscape on Travel Sites
| Site Category | CAPTCHA Type | Difficulty |
|---|---|---|
| Airlines (direct) | reCAPTCHA v3, Cloudflare | Medium |
| OTAs (Expedia, Booking) | reCAPTCHA v2, Turnstile | Medium-High |
| Meta-search (Google Flights, Kayak) | reCAPTCHA v3 | Medium |
| Budget airlines | Image CAPTCHA, reCAPTCHA | Low-Medium |
| Hotel aggregators | Cloudflare Challenge | High |
Fare Monitor Implementation
import requests
import time
import re
import json
import os
from datetime import datetime, timedelta
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
def solve_captcha(params):
params["key"] = API_KEY
resp = requests.get("https://ocr.captchaai.com/in.php", params=params)
if not resp.text.startswith("OK|"):
raise Exception(f"Submit: {resp.text}")
task_id = resp.text.split("|")[1]
for _ in range(60):
time.sleep(5)
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get", "id": task_id,
})
if result.text == "CAPCHA_NOT_READY":
continue
if result.text.startswith("OK|"):
return result.text.split("|", 1)[1]
raise Exception(f"Solve: {result.text}")
raise TimeoutError()
class FareMonitor:
def __init__(self):
self.session = requests.Session()
self.session.headers["User-Agent"] = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0"
)
self.history = []
def fetch_with_captcha(self, url):
"""Fetch a travel page, solving CAPTCHAs if encountered."""
resp = self.session.get(url)
# reCAPTCHA v2/v3
match = re.search(
r'data-sitekey=["\']([A-Za-z0-9_-]+)["\']', resp.text
)
if match:
site_key = match.group(1)
# Detect v3 vs v2
if "recaptcha/api.js?render=" in resp.text:
token = solve_captcha({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
"version": "v3",
"action": "search",
})
else:
token = solve_captcha({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
resp = self.session.post(url, data={
"g-recaptcha-response": token,
})
# Cloudflare Turnstile
if "cf-turnstile" in resp.text:
match = re.search(
r'data-sitekey=["\']([^"\']+)', resp.text
)
if match:
token = solve_captcha({
"method": "turnstile",
"sitekey": match.group(1),
"pageurl": url,
})
resp = self.session.post(url, data={
"cf-turnstile-response": token,
})
return resp.text
def check_fares(self, routes):
"""Check fares for a list of routes."""
results = []
for route in routes:
try:
html = self.fetch_with_captcha(route["url"])
prices = self._extract_prices(html)
result = {
"route": f"{route['origin']}-{route['destination']}",
"date": route["date"],
"prices": prices,
"min_price": min(prices) if prices else None,
"timestamp": datetime.utcnow().isoformat(),
}
results.append(result)
self.history.append(result)
if prices:
print(f" {result['route']} ({route['date']}): "
f"${min(prices)}-${max(prices)}")
else:
print(f" {result['route']}: No prices found")
time.sleep(3) # Respectful delay
except Exception as e:
print(f" {route.get('origin', '?')}-"
f"{route.get('destination', '?')}: ERROR - {e}")
return results
def _extract_prices(self, html):
"""Extract prices from travel page HTML."""
prices = []
# Common price patterns
for match in re.finditer(
r'\$\s*([\d,]+(?:\.\d{2})?)', html
):
price = float(match.group(1).replace(",", ""))
if 20 < price < 10000: # Filter noise
prices.append(price)
return sorted(set(prices))
def detect_price_drops(self, threshold_pct=5):
"""Detect significant price drops in history."""
route_prices = {}
for entry in self.history:
key = f"{entry['route']}_{entry['date']}"
if key not in route_prices:
route_prices[key] = []
if entry["min_price"]:
route_prices[key].append(entry["min_price"])
alerts = []
for key, prices in route_prices.items():
if len(prices) >= 2:
prev = prices[-2]
current = prices[-1]
change_pct = ((current - prev) / prev) * 100
if change_pct < -threshold_pct:
alerts.append({
"route": key,
"previous": prev,
"current": current,
"change": f"{change_pct:.1f}%",
})
return alerts
def export_report(self, filename="fare_report.json"):
"""Export fare history to JSON."""
with open(filename, "w") as f:
json.dump(self.history, f, indent=2)
print(f"Exported {len(self.history)} fare checks to {filename}")
# Define routes to monitor
routes = [
{
"origin": "JFK",
"destination": "LAX",
"date": "2025-03-15",
"url": "https://example-airline.com/flights?from=JFK&to=LAX&date=2025-03-15",
},
{
"origin": "SFO",
"destination": "ORD",
"date": "2025-03-20",
"url": "https://example-airline.com/flights?from=SFO&to=ORD&date=2025-03-20",
},
]
monitor = FareMonitor()
results = monitor.check_fares(routes)
monitor.export_report()
Scheduling
# Check fares every 4 hours
0 */4 * * * cd /opt/fare-monitor && python fare_monitor.py
Cost Analysis
| Monitoring Level | Routes | Checks/Day | CAPTCHAs/Day | Est. Cost |
|---|---|---|---|---|
| Personal | 5 | 6/route | ~30 | $0.50 |
| Small Agency | 50 | 4/route | ~200 | $2-5 |
| Enterprise | 500 | 6/route | ~3,000 | $20-40 |
FAQ
How often should I check fares?
Every 4-6 hours for personal use. Every 1-2 hours for business use. Airlines update prices in batches, so more frequent checks yield diminishing returns.
Can I monitor hotel prices too?
Yes. The same approach works for Booking.com, Expedia, and hotel direct sites. Adjust the price extraction patterns for hotel page formats.
How do I handle dynamic pricing pages?
Some travel sites require JavaScript rendering. Use Selenium or Playwright for the page fetch, then CaptchaAI for CAPTCHA solving.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.