In a batch of 500 CAPTCHA tasks, some will fail. Network timeouts, temporary API limits, and unsolvable challenges are normal. The question isn't whether failures happen — it's how your system recovers from them without losing progress or re-solving already-completed tasks.
Error Categories
| Category | Examples | Retryable? |
|---|---|---|
| Transient | ERROR_NO_SLOT_AVAILABLE, network timeout, 429 |
Yes — retry after delay |
| Permanent | ERROR_WRONG_USER_KEY, ERROR_KEY_DOES_NOT_EXIST |
No — fix configuration |
| Task-specific | ERROR_CAPTCHA_UNSOLVABLE, invalid sitekey |
Maybe — retry once, then skip |
| Budget | ERROR_ZERO_BALANCE |
No — stop batch, refill |
Python: Batch with Error Recovery
import json
import time
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass, field, asdict
from enum import Enum
API_KEY = "YOUR_API_KEY"
SUBMIT_URL = "https://ocr.captchaai.com/in.php"
RESULT_URL = "https://ocr.captchaai.com/res.php"
class TaskStatus(str, Enum):
PENDING = "pending"
SOLVING = "solving"
SOLVED = "solved"
FAILED = "failed"
RETRYING = "retrying"
@dataclass
class TaskResult:
index: int
task_data: dict
status: TaskStatus = TaskStatus.PENDING
token: str = ""
error: str = ""
attempts: int = 0
solve_time: float = 0.0
RETRYABLE_ERRORS = {
"ERROR_NO_SLOT_AVAILABLE",
"ERROR_TOO_MUCH_REQUESTS",
"CAPCHA_NOT_READY", # Unlikely here, but safe
}
FATAL_ERRORS = {
"ERROR_WRONG_USER_KEY",
"ERROR_KEY_DOES_NOT_EXIST",
"ERROR_ZERO_BALANCE",
"ERROR_IP_NOT_ALLOWED",
}
def solve_task(result, max_retries=3):
"""Solve a single task with retry logic."""
for attempt in range(1, max_retries + 1):
result.attempts = attempt
result.status = TaskStatus.SOLVING if attempt == 1 else TaskStatus.RETRYING
start = time.monotonic()
try:
# Submit
data = result.task_data
params = {
"key": API_KEY,
"method": data.get("method", "userrecaptcha"),
"json": 1,
}
if params["method"] == "userrecaptcha":
params["googlekey"] = data["sitekey"]
params["pageurl"] = data["pageurl"]
elif params["method"] == "turnstile":
params["sitekey"] = data["sitekey"]
params["pageurl"] = data["pageurl"]
response = requests.post(SUBMIT_URL, data=params, timeout=30)
submit_result = response.json()
if submit_result.get("status") != 1:
error = submit_result.get("request", "unknown")
if error in FATAL_ERRORS:
result.status = TaskStatus.FAILED
result.error = error
return result # Don't retry fatal errors
if error in RETRYABLE_ERRORS and attempt < max_retries:
time.sleep(5 * attempt) # Exponential backoff
continue
result.status = TaskStatus.FAILED
result.error = error
return result
task_id = submit_result["request"]
# Poll
for _ in range(60):
time.sleep(5)
poll = requests.get(RESULT_URL, params={
"key": API_KEY, "action": "get",
"id": task_id, "json": 1,
}, timeout=15).json()
if poll.get("request") == "CAPCHA_NOT_READY":
continue
if poll.get("status") == 1:
result.status = TaskStatus.SOLVED
result.token = poll["request"]
result.solve_time = time.monotonic() - start
return result
error = poll.get("request", "unknown")
if error == "ERROR_CAPTCHA_UNSOLVABLE" and attempt < max_retries:
break # Retry the whole task
result.status = TaskStatus.FAILED
result.error = error
return result
# Timeout — retry if attempts remain
if attempt >= max_retries:
result.status = TaskStatus.FAILED
result.error = "TIMEOUT"
return result
except requests.RequestException as e:
if attempt >= max_retries:
result.status = TaskStatus.FAILED
result.error = f"Network: {e}"
return result
time.sleep(5 * attempt)
return result
class BatchProcessor:
def __init__(self, checkpoint_file="batch_checkpoint.json"):
self.checkpoint_file = checkpoint_file
self.results = []
def save_checkpoint(self):
"""Save current progress to disk."""
data = [
{
"index": r.index,
"task_data": r.task_data,
"status": r.status.value,
"token": r.token,
"error": r.error,
"attempts": r.attempts,
"solve_time": r.solve_time,
}
for r in self.results
]
with open(self.checkpoint_file, "w") as f:
json.dump(data, f, indent=2)
def load_checkpoint(self):
"""Load progress from a previous run."""
if not os.path.exists(self.checkpoint_file):
return []
with open(self.checkpoint_file) as f:
data = json.load(f)
return [
TaskResult(
index=d["index"],
task_data=d["task_data"],
status=TaskStatus(d["status"]),
token=d.get("token", ""),
error=d.get("error", ""),
attempts=d.get("attempts", 0),
solve_time=d.get("solve_time", 0.0),
)
for d in data
]
def process(self, tasks, max_workers=10, max_retries=3):
"""Process a batch of tasks with checkpointing."""
# Load or initialize results
existing = self.load_checkpoint()
if existing:
self.results = existing
solved_count = sum(1 for r in self.results if r.status == TaskStatus.SOLVED)
print(f"Resuming from checkpoint: {solved_count}/{len(self.results)} solved")
else:
self.results = [
TaskResult(index=i, task_data=task) for i, task in enumerate(tasks)
]
# Find tasks that need processing
pending = [
r for r in self.results
if r.status not in (TaskStatus.SOLVED,)
and (r.status != TaskStatus.FAILED or r.error not in FATAL_ERRORS)
]
if not pending:
print("All tasks already completed")
return self.results
print(f"Processing {len(pending)} tasks ({len(self.results) - len(pending)} already done)")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {
executor.submit(solve_task, result, max_retries): result
for result in pending
}
completed = 0
for future in as_completed(futures):
completed += 1
result = future.result()
# Check for fatal error — stop entire batch
if result.error in FATAL_ERRORS:
print(f"\nFATAL: {result.error} — stopping batch")
executor.shutdown(wait=False, cancel_futures=True)
self.save_checkpoint()
return self.results
status_icon = "OK" if result.status == TaskStatus.SOLVED else "FAIL"
print(
f" [{completed}/{len(pending)}] "
f"Task {result.index}: {status_icon} "
f"(attempts={result.attempts}, {result.solve_time:.1f}s)"
)
# Checkpoint every 10 completions
if completed % 10 == 0:
self.save_checkpoint()
self.save_checkpoint()
# Summary
solved = sum(1 for r in self.results if r.status == TaskStatus.SOLVED)
failed = sum(1 for r in self.results if r.status == TaskStatus.FAILED)
print(f"\nBatch complete: {solved} solved, {failed} failed out of {len(self.results)}")
return self.results
def retry_failed(self, max_workers=10, max_retries=2):
"""Retry only failed tasks from the last run."""
failed = [
r for r in self.results
if r.status == TaskStatus.FAILED and r.error not in FATAL_ERRORS
]
if not failed:
print("No retryable failures")
return
# Reset failed tasks
for r in failed:
r.status = TaskStatus.PENDING
r.error = ""
r.token = ""
print(f"Retrying {len(failed)} failed tasks")
self.process(
[r.task_data for r in self.results],
max_workers=max_workers,
max_retries=max_retries,
)
# Usage
tasks = [
{"method": "userrecaptcha", "sitekey": "SITE_KEY", "pageurl": f"https://example.com/page{i}"}
for i in range(50)
]
processor = BatchProcessor("my_batch_checkpoint.json")
results = processor.process(tasks, max_workers=10, max_retries=3)
# Later: retry just the failures
# processor.retry_failed()
JavaScript: Batch with Error Recovery
const fs = require("fs");
const API_KEY = "YOUR_API_KEY";
const SUBMIT_URL = "https://ocr.captchaai.com/in.php";
const RESULT_URL = "https://ocr.captchaai.com/res.php";
const RETRYABLE = new Set(["ERROR_NO_SLOT_AVAILABLE", "ERROR_TOO_MUCH_REQUESTS"]);
const FATAL = new Set(["ERROR_WRONG_USER_KEY", "ERROR_KEY_DOES_NOT_EXIST", "ERROR_ZERO_BALANCE"]);
async function solveWithRetry(taskData, maxRetries = 3) {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const params = { key: API_KEY, json: 1, method: "userrecaptcha", ...taskData };
const response = await fetch(SUBMIT_URL, { method: "POST", body: new URLSearchParams(params) });
const result = await response.json();
if (result.status !== 1) {
if (FATAL.has(result.request)) return { status: "fatal", error: result.request };
if (RETRYABLE.has(result.request) && attempt < maxRetries) {
await new Promise((r) => setTimeout(r, 5000 * attempt));
continue;
}
return { status: "failed", error: result.request, attempts: attempt };
}
const taskId = result.request;
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
const url = new URL(RESULT_URL);
url.searchParams.set("key", API_KEY);
url.searchParams.set("action", "get");
url.searchParams.set("id", taskId);
url.searchParams.set("json", "1");
const poll = await (await fetch(url)).json();
if (poll.request === "CAPCHA_NOT_READY") continue;
if (poll.status === 1) return { status: "solved", token: poll.request, attempts: attempt };
if (poll.request === "ERROR_CAPTCHA_UNSOLVABLE" && attempt < maxRetries) break;
return { status: "failed", error: poll.request, attempts: attempt };
}
} catch (err) {
if (attempt >= maxRetries) return { status: "failed", error: err.message, attempts: attempt };
await new Promise((r) => setTimeout(r, 5000 * attempt));
}
}
return { status: "failed", error: "MAX_RETRIES", attempts: 3 };
}
async function processBatch(tasks, checkpointFile = "checkpoint.json", maxWorkers = 10) {
// Load checkpoint
let results = [];
if (fs.existsSync(checkpointFile)) {
results = JSON.parse(fs.readFileSync(checkpointFile, "utf8"));
console.log(`Resuming: ${results.filter((r) => r.status === "solved").length}/${results.length} done`);
} else {
results = tasks.map((t, i) => ({ index: i, taskData: t, status: "pending" }));
}
const pending = results.filter((r) => r.status !== "solved" && !FATAL.has(r.error));
for (let i = 0; i < pending.length; i += maxWorkers) {
const batch = pending.slice(i, i + maxWorkers);
const batchResults = await Promise.all(batch.map((r) => solveWithRetry(r.taskData)));
for (let j = 0; j < batch.length; j++) {
const br = batchResults[j];
if (br.status === "fatal") {
console.error(`FATAL: ${br.error} — stopping`);
fs.writeFileSync(checkpointFile, JSON.stringify(results, null, 2));
return results;
}
Object.assign(batch[j], br);
}
// Checkpoint
if ((i + maxWorkers) % 50 === 0) {
fs.writeFileSync(checkpointFile, JSON.stringify(results, null, 2));
}
}
fs.writeFileSync(checkpointFile, JSON.stringify(results, null, 2));
const solved = results.filter((r) => r.status === "solved").length;
console.log(`Done: ${solved}/${results.length} solved`);
return results;
}
Checkpoint File Format
[
{
"index": 0,
"task_data": {"method": "userrecaptcha", "sitekey": "...", "pageurl": "..."},
"status": "solved",
"token": "03AGdBq24...",
"attempts": 1,
"solve_time": 15.3
},
{
"index": 1,
"task_data": {"method": "userrecaptcha", "sitekey": "...", "pageurl": "..."},
"status": "failed",
"error": "ERROR_CAPTCHA_UNSOLVABLE",
"attempts": 3
}
]
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| Batch stops on first error | Fatal error detection triggered | Check error type — ERROR_ZERO_BALANCE and ERROR_WRONG_USER_KEY stop the batch by design |
| Checkpoint file corrupted | Crash during write | Use atomic writes: write to temp file, then rename |
| Resume processes already-solved tasks | Checkpoint filter not working | Verify status === "solved" filter in pending task selection |
| Too many retries wasting budget | Retrying permanent failures | Classify errors correctly — only retry transient errors |
| Batch never completes | Some tasks stuck in retry loop | Add max_retries limit; mark as failed after exhausting retries |
FAQ
How often should I save checkpoints?
Every 10–50 completed tasks is a good balance. Too frequent slows processing (disk I/O); too infrequent risks losing progress on crash. For large batches (1,000+), checkpoint every 50 tasks.
Should I retry ERROR_CAPTCHA_UNSOLVABLE?
Once. Some unsolvable errors are transient — the image was ambiguous. A second attempt with a different worker may succeed. After two failures on the same task, mark it as permanently failed.
How do I handle partial results in downstream processing?
Process solved tasks immediately and separately. Don't wait for the entire batch. Your downstream system should handle missing entries — either skip them or flag for manual review.
Next Steps
Build resilient batch processing with CaptchaAI — get your API key and implement checkpoint-based recovery.
Related guides:
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.