Your scraper encounters reCAPTCHA v2 on one site, Turnstile on another, and image CAPTCHAs on a third. Without a factory, you end up with a growing if-else chain that's hard to maintain and extend. The factory pattern maps each CAPTCHA type to its solver automatically.
The Problem
# Without factory — grows with every new CAPTCHA type
if captcha_type == "recaptcha_v2":
token = solve_recaptcha_v2(sitekey, url)
elif captcha_type == "recaptcha_v3":
token = solve_recaptcha_v3(sitekey, url, action, score)
elif captcha_type == "turnstile":
token = solve_turnstile(sitekey, url)
elif captcha_type == "hcaptcha":
token = solve_hcaptcha(sitekey, url)
# ... more types
Python: CAPTCHA Solver Factory
import re
import requests
import time
from abc import ABC, abstractmethod
API_KEY = "YOUR_API_KEY"
SUBMIT_URL = "https://ocr.captchaai.com/in.php"
RESULT_URL = "https://ocr.captchaai.com/res.php"
# --- Base solver ---
class BaseSolver(ABC):
"""Base class for all CAPTCHA solvers."""
def __init__(self, api_key: str):
self.api_key = api_key
def _submit_and_poll(self, params: dict, timeout: int = 180) -> str:
params["key"] = self.api_key
params["json"] = 1
resp = requests.post(SUBMIT_URL, data=params, timeout=30).json()
if resp.get("status") != 1:
raise RuntimeError(f"Submit failed: {resp.get('request')}")
task_id = resp["request"]
start = time.monotonic()
while time.monotonic() - start < timeout:
time.sleep(5)
poll = requests.get(RESULT_URL, params={
"key": self.api_key, "action": "get",
"id": task_id, "json": 1,
}, timeout=15).json()
if poll.get("request") == "CAPCHA_NOT_READY":
continue
if poll.get("status") == 1:
return poll["request"]
raise RuntimeError(f"Solve failed: {poll.get('request')}")
raise RuntimeError("Timeout")
@abstractmethod
def solve(self, **kwargs) -> str:
"""Solve the CAPTCHA and return the token."""
@staticmethod
@abstractmethod
def can_handle(captcha_info: dict) -> bool:
"""Return True if this solver handles the given CAPTCHA type."""
# --- Concrete solvers ---
class RecaptchaV2Solver(BaseSolver):
def solve(self, sitekey: str, pageurl: str, invisible: bool = False, **kwargs) -> str:
params = {
"method": "userrecaptcha",
"googlekey": sitekey,
"pageurl": pageurl,
}
if invisible:
params["invisible"] = "1"
return self._submit_and_poll(params)
@staticmethod
def can_handle(captcha_info: dict) -> bool:
return captcha_info.get("type") == "recaptcha_v2"
class RecaptchaV3Solver(BaseSolver):
def solve(self, sitekey: str, pageurl: str, action: str = "verify",
min_score: float = 0.3, **kwargs) -> str:
return self._submit_and_poll({
"method": "userrecaptcha",
"version": "v3",
"googlekey": sitekey,
"pageurl": pageurl,
"action": action,
"min_score": str(min_score),
})
@staticmethod
def can_handle(captcha_info: dict) -> bool:
return captcha_info.get("type") == "recaptcha_v3"
class TurnstileSolver(BaseSolver):
def solve(self, sitekey: str, pageurl: str, **kwargs) -> str:
return self._submit_and_poll({
"method": "turnstile",
"sitekey": sitekey,
"pageurl": pageurl,
})
@staticmethod
def can_handle(captcha_info: dict) -> bool:
return captcha_info.get("type") == "turnstile"
class HCaptchaSolver(BaseSolver):
def solve(self, sitekey: str, pageurl: str, **kwargs) -> str:
return self._submit_and_poll({
"method": "hcaptcha",
"sitekey": sitekey,
"pageurl": pageurl,
})
@staticmethod
def can_handle(captcha_info: dict) -> bool:
return captcha_info.get("type") == "hcaptcha"
class ImageSolver(BaseSolver):
def solve(self, base64_image: str, **kwargs) -> str:
return self._submit_and_poll({
"method": "base64",
"body": base64_image,
})
@staticmethod
def can_handle(captcha_info: dict) -> bool:
return captcha_info.get("type") == "image"
# --- Factory ---
class CaptchaSolverFactory:
"""Creates the correct solver based on detected CAPTCHA type."""
def __init__(self, api_key: str):
self.api_key = api_key
self._solvers = [
RecaptchaV2Solver,
RecaptchaV3Solver,
TurnstileSolver,
HCaptchaSolver,
ImageSolver,
]
def register(self, solver_class: type):
"""Register a new solver type."""
self._solvers.append(solver_class)
def create(self, captcha_info: dict) -> BaseSolver:
"""Create a solver instance for the given CAPTCHA info."""
for solver_class in self._solvers:
if solver_class.can_handle(captcha_info):
return solver_class(self.api_key)
raise ValueError(f"No solver for CAPTCHA type: {captcha_info.get('type')}")
def detect_and_solve(self, html: str, pageurl: str) -> str:
"""Detect CAPTCHA type from HTML and solve it."""
captcha_info = detect_captcha(html, pageurl)
if not captcha_info:
raise ValueError("No CAPTCHA detected on page")
solver = self.create(captcha_info)
return solver.solve(**captcha_info)
# --- Detection ---
def detect_captcha(html: str, pageurl: str) -> dict | None:
"""Detect CAPTCHA type and extract parameters from HTML."""
# Turnstile
match = re.search(r'cf-turnstile[^>]*data-sitekey=["\']([^"\']+)["\']', html)
if match:
return {"type": "turnstile", "sitekey": match.group(1), "pageurl": pageurl}
# reCAPTCHA v3 (check before v2 — v3 has render with version hint)
match = re.search(r'recaptcha/api\.js\?render=([^&"\']+)', html)
if match and match.group(1) != "explicit":
return {"type": "recaptcha_v3", "sitekey": match.group(1), "pageurl": pageurl}
# reCAPTCHA v2
match = re.search(r'g-recaptcha[^>]*data-sitekey=["\']([^"\']+)["\']', html)
if match:
invisible = 'data-size="invisible"' in html
return {"type": "recaptcha_v2", "sitekey": match.group(1), "pageurl": pageurl, "invisible": invisible}
# hCaptcha
match = re.search(r'h-captcha[^>]*data-sitekey=["\']([^"\']+)["\']', html)
if match:
return {"type": "hcaptcha", "sitekey": match.group(1), "pageurl": pageurl}
return None
# --- Usage ---
factory = CaptchaSolverFactory("YOUR_API_KEY")
# Auto-detect and solve
session = requests.Session()
response = session.get("https://example.com/login")
token = factory.detect_and_solve(response.text, "https://example.com/login")
print(f"Token: {token[:30]}...")
# Or create a specific solver
solver = factory.create({"type": "turnstile"})
token = solver.solve(sitekey="SITEKEY", pageurl="https://example.com")
JavaScript: CAPTCHA Solver Factory
const API_KEY = "YOUR_API_KEY";
const SUBMIT_URL = "https://ocr.captchaai.com/in.php";
const RESULT_URL = "https://ocr.captchaai.com/res.php";
async function submitAndPoll(params) {
const body = new URLSearchParams({ key: API_KEY, json: "1", ...params });
const resp = await (await fetch(SUBMIT_URL, { method: "POST", body })).json();
if (resp.status !== 1) throw new Error(`Submit: ${resp.request}`);
const taskId = resp.request;
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
const url = `${RESULT_URL}?key=${API_KEY}&action=get&id=${taskId}&json=1`;
const poll = await (await fetch(url)).json();
if (poll.request === "CAPCHA_NOT_READY") continue;
if (poll.status === 1) return poll.request;
throw new Error(`Solve: ${poll.request}`);
}
throw new Error("Timeout");
}
// Solver registry
const solvers = {
recaptcha_v2: ({ sitekey, pageurl }) =>
submitAndPoll({ method: "userrecaptcha", googlekey: sitekey, pageurl }),
recaptcha_v3: ({ sitekey, pageurl, action = "verify", minScore = 0.3 }) =>
submitAndPoll({ method: "userrecaptcha", version: "v3", googlekey: sitekey, pageurl, action, min_score: String(minScore) }),
turnstile: ({ sitekey, pageurl }) =>
submitAndPoll({ method: "turnstile", sitekey, pageurl }),
hcaptcha: ({ sitekey, pageurl }) =>
submitAndPoll({ method: "hcaptcha", sitekey, pageurl }),
image: ({ base64Image }) =>
submitAndPoll({ method: "base64", body: base64Image }),
};
function registerSolver(type, handler) {
solvers[type] = handler;
}
function detectCaptcha(html, pageurl) {
const turnstile = html.match(/cf-turnstile[^>]*data-sitekey=["']([^"']+)["']/);
if (turnstile) return { type: "turnstile", sitekey: turnstile[1], pageurl };
const recaptcha = html.match(/g-recaptcha[^>]*data-sitekey=["']([^"']+)["']/);
if (recaptcha) return { type: "recaptcha_v2", sitekey: recaptcha[1], pageurl };
const hcaptcha = html.match(/h-captcha[^>]*data-sitekey=["']([^"']+)["']/);
if (hcaptcha) return { type: "hcaptcha", sitekey: hcaptcha[1], pageurl };
return null;
}
async function solve(captchaInfo) {
const handler = solvers[captchaInfo.type];
if (!handler) throw new Error(`No solver for: ${captchaInfo.type}`);
return handler(captchaInfo);
}
// Usage
const html = await (await fetch("https://example.com/login")).text();
const info = detectCaptcha(html, "https://example.com/login");
if (info) {
const token = await solve(info);
console.log(`Solved ${info.type}: ${token.substring(0, 30)}...`);
}
Adding New CAPTCHA Types
The factory's key advantage — adding a new type doesn't change existing code:
# Add GeeTest support without modifying existing solvers
class GeeTestV3Solver(BaseSolver):
def solve(self, gt: str, challenge: str, pageurl: str, **kwargs) -> str:
return self._submit_and_poll({
"method": "geetest",
"gt": gt,
"challenge": challenge,
"pageurl": pageurl,
})
@staticmethod
def can_handle(captcha_info: dict) -> bool:
return captcha_info.get("type") == "geetest_v3"
# Register it
factory.register(GeeTestV3Solver)
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
No solver for CAPTCHA type |
Type not registered | Register the solver class with factory.register() |
| Wrong solver selected | Detection order matters | Check for more specific types first (reCAPTCHA v3 before v2) |
| Factory creates new instance every call | By design — avoids state leakage | Cache instances if stateless sharing is safe |
can_handle matches multiple solvers |
Overlapping detection logic | Make detection conditions more specific |
| Detection misses dynamically loaded CAPTCHAs | CAPTCHA not in initial HTML | Fetch page with JavaScript rendering before detection |
FAQ
When should I use a factory vs. direct solver instantiation?
Use the factory when you handle multiple CAPTCHA types and want auto-detection. Use direct instantiation when you know the exact CAPTCHA type at compile time and don't need runtime flexibility.
How does the factory differ from dependency injection?
DI decides which solver to use at application startup. The factory decides at runtime based on detected CAPTCHA data. They work well together — inject the factory via DI, then use it to create solvers at runtime.
Can I combine the factory with a cache?
Yes. Cache solved tokens by (sitekey, pageurl) key with a TTL. Before creating a solver, check the cache. This avoids re-solving the same CAPTCHA within the token's validity window.
Next Steps
Handle any CAPTCHA type automatically — get your CaptchaAI API key and implement the factory pattern.
Related guides:
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.