Your scraper submits a CAPTCHA to CaptchaAI, but the response times out. Did the task submit? You don't know, so you retry — now two tasks are solving the same CAPTCHA. You pay for both. With idempotency tracking, retries detect the existing task and return its result instead of creating a duplicate.
Where Duplicates Happen
| Scenario | What happens | Cost |
|---|---|---|
| Network timeout on submit | Retry creates second task | 2× solve cost |
| Worker crash mid-poll | New worker resubmits | 2× solve cost |
| Queue delivers same message twice | Two workers solve same CAPTCHA | 2× solve cost |
| User refreshes page during solve | Frontend resubmits | 2× solve cost |
Python: Idempotency Layer
import hashlib
import json
import time
import requests
import threading
from dataclasses import dataclass
API_KEY = "YOUR_API_KEY"
SUBMIT_URL = "https://ocr.captchaai.com/in.php"
RESULT_URL = "https://ocr.captchaai.com/res.php"
@dataclass
class TaskRecord:
task_id: str
created_at: float
result: str | None = None
completed: bool = False
class IdempotentSolver:
"""CAPTCHA solver that prevents duplicate submissions."""
def __init__(self, api_key: str, ttl: int = 300):
self._api_key = api_key
self._ttl = ttl # seconds to keep idempotency records
self._tasks: dict[str, TaskRecord] = {}
self._lock = threading.Lock()
def _make_key(self, params: dict) -> str:
"""Generate a deterministic key from solve parameters."""
# Exclude volatile fields — only hash the CAPTCHA identity
stable = {
"method": params.get("method"),
"googlekey": params.get("googlekey"),
"sitekey": params.get("sitekey"),
"pageurl": params.get("pageurl"),
"body": params.get("body", "")[:100], # first 100 chars of image
}
# Remove None values
stable = {k: v for k, v in stable.items() if v is not None}
raw = json.dumps(stable, sort_keys=True)
return hashlib.sha256(raw.encode()).hexdigest()[:16]
def _cleanup(self):
"""Remove expired idempotency records."""
cutoff = time.monotonic() - self._ttl
expired = [k for k, v in self._tasks.items() if v.created_at < cutoff]
for k in expired:
del self._tasks[k]
def _submit(self, params: dict) -> str:
"""Submit a task to CaptchaAI."""
submit_params = {**params, "key": self._api_key, "json": 1}
resp = requests.post(SUBMIT_URL, data=submit_params, timeout=30).json()
if resp.get("status") != 1:
raise RuntimeError(f"Submit failed: {resp.get('request')}")
return resp["request"]
def _poll(self, task_id: str, timeout: int = 180) -> str:
"""Poll for a task result."""
start = time.monotonic()
while time.monotonic() - start < timeout:
time.sleep(5)
resp = requests.get(RESULT_URL, params={
"key": self._api_key, "action": "get",
"id": task_id, "json": 1,
}, timeout=15).json()
if resp.get("request") == "CAPCHA_NOT_READY":
continue
if resp.get("status") == 1:
return resp["request"]
raise RuntimeError(f"Solve failed: {resp.get('request')}")
raise RuntimeError("Timeout")
def solve(self, params: dict, timeout: int = 180) -> str:
"""Solve a CAPTCHA with idempotency protection."""
idem_key = self._make_key(params)
with self._lock:
self._cleanup()
# Check for existing task
if idem_key in self._tasks:
record = self._tasks[idem_key]
if record.completed and record.result:
print(f"[IDEM] Returning cached result for {idem_key}")
return record.result
if not record.completed:
print(f"[IDEM] Task {record.task_id} already in progress")
# Fall through to poll the existing task
task_id = record.task_id
# Release lock before polling
self._lock.release()
try:
result = self._poll(task_id, timeout)
with self._lock:
record.result = result
record.completed = True
return result
except Exception:
self._lock.acquire()
raise
# New task — submit and record
task_id = self._submit(params)
self._tasks[idem_key] = TaskRecord(
task_id=task_id, created_at=time.monotonic()
)
# Poll outside the lock
result = self._poll(task_id, timeout)
with self._lock:
record = self._tasks.get(idem_key)
if record:
record.result = result
record.completed = True
return result
def get_stats(self) -> dict:
with self._lock:
total = len(self._tasks)
completed = sum(1 for t in self._tasks.values() if t.completed)
return {"total_tracked": total, "completed": completed, "pending": total - completed}
# --- Usage ---
solver = IdempotentSolver("YOUR_API_KEY")
params = {
"method": "turnstile",
"sitekey": "0x4XXXXXXXXXXXXXXXXX",
"pageurl": "https://example.com/login",
}
# First call — submits to CaptchaAI
token1 = solver.solve(params)
# Retry — returns cached result without a new submission
token2 = solver.solve(params)
assert token1 == token2 # Same token, paid only once
print(solver.get_stats())
JavaScript: Deduplication with Map
const API_KEY = "YOUR_API_KEY";
const SUBMIT_URL = "https://ocr.captchaai.com/in.php";
const RESULT_URL = "https://ocr.captchaai.com/res.php";
class IdempotentSolver {
#apiKey;
#tasks = new Map(); // key -> { taskId, promise, result }
#ttl;
constructor(apiKey, ttlMs = 300_000) {
this.#apiKey = apiKey;
this.#ttl = ttlMs;
}
#makeKey(params) {
const stable = JSON.stringify({
method: params.method,
googlekey: params.googlekey,
sitekey: params.sitekey,
pageurl: params.pageurl,
});
// Simple hash for deduplication
let hash = 0;
for (const ch of stable) hash = ((hash << 5) - hash + ch.charCodeAt(0)) | 0;
return hash.toString(36);
}
async #submitAndPoll(params) {
const body = new URLSearchParams({ key: this.#apiKey, json: "1", ...params });
const resp = await (await fetch(SUBMIT_URL, { method: "POST", body })).json();
if (resp.status !== 1) throw new Error(`Submit: ${resp.request}`);
const taskId = resp.request;
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
const url = `${RESULT_URL}?key=${this.#apiKey}&action=get&id=${taskId}&json=1`;
const poll = await (await fetch(url)).json();
if (poll.request === "CAPCHA_NOT_READY") continue;
if (poll.status === 1) return poll.request;
throw new Error(`Solve: ${poll.request}`);
}
throw new Error("Timeout");
}
async solve(params) {
const key = this.#makeKey(params);
// Check for in-flight or completed task
if (this.#tasks.has(key)) {
const record = this.#tasks.get(key);
if (record.result) {
console.log(`[IDEM] Returning cached result for ${key}`);
return record.result;
}
if (record.promise) {
console.log(`[IDEM] Awaiting in-flight task for ${key}`);
return record.promise;
}
}
// Store the promise so concurrent callers await the same solve
const promise = this.#submitAndPoll(params).then((result) => {
this.#tasks.set(key, { result, promise: null, createdAt: Date.now() });
setTimeout(() => this.#tasks.delete(key), this.#ttl);
return result;
});
this.#tasks.set(key, { result: null, promise, createdAt: Date.now() });
return promise;
}
}
// Usage
const solver = new IdempotentSolver("YOUR_API_KEY");
const params = {
method: "turnstile",
sitekey: "0x4XXXXXXXXXXXXXXXXX",
pageurl: "https://example.com/login",
};
// Both calls resolve to the same result — only one API submission
const [token1, token2] = await Promise.all([
solver.solve(params),
solver.solve(params),
]);
console.log(token1 === token2); // true
Idempotency Key Design
| Approach | Pros | Cons |
|---|---|---|
| Hash of (method + sitekey + pageurl) | Simple, automatic | Different pages with same sitekey deduplicate correctly |
| Caller-provided UUID | Full control | Requires discipline from all callers |
| Hash + timestamp window | Prevents over-caching | More complex key logic |
| Request body hash | Catches all parameter changes | Image body hashing can be expensive |
Distributed Idempotency
For multi-worker systems, use Redis instead of in-process memory:
import redis
r = redis.Redis()
def idempotent_solve(params: dict, ttl: int = 300) -> str:
key = f"captcha:idem:{make_key(params)}"
# Try to claim the task
existing = r.get(key)
if existing:
data = json.loads(existing)
if data.get("result"):
return data["result"]
# Another worker is solving — poll its task
return poll(data["task_id"])
# Submit and claim
task_id = submit(params)
r.setex(key, ttl, json.dumps({"task_id": task_id}))
result = poll(task_id)
r.setex(key, ttl, json.dumps({"task_id": task_id, "result": result}))
return result
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| Cache returns stale token | TTL too long for token validity | Set TTL shorter than CAPTCHA token expiry (usually 90–120s) |
| Different params generate same key | Key hash collision or missing fields | Include all distinguishing params in the key |
| Memory grows unbounded | No TTL cleanup | Run _cleanup() periodically or use TTL-based eviction |
| Concurrent calls both submit | Race condition in check-then-set | Use locks (Python) or store the promise (JavaScript) |
| Distributed dedup fails | Redis key expired too early | Set Redis TTL to match expected solve time + buffer |
FAQ
How long should I keep idempotency records?
Keep them for the maximum CAPTCHA token validity period — typically 90–120 seconds for reCAPTCHA, 300 seconds for Turnstile. After that, a new solve is likely needed anyway.
Does this work with different proxy configurations?
If you use different proxies for the same CAPTCHA, include the proxy in the idempotency key. Same sitekey + different proxy = different task. If proxy doesn't matter for your use case, omit it from the key.
How do I handle failed solves in the idempotency cache?
Remove the record on failure so the next retry can resubmit. Only cache successful results. The JavaScript promise approach handles this naturally — a rejected promise isn't cached.
Next Steps
Stop paying for duplicate CAPTCHA solves — get your CaptchaAI API key and add idempotency tracking.
Related guides:
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.