Solving CAPTCHAs one at a time wastes 10-15 seconds per solve waiting for results. Parallel solving lets you process dozens simultaneously, matching your scraping throughput.
Architecture Overview
Your Application
├── Task 1: Submit → Poll → Token ──┐
├── Task 2: Submit → Poll → Token ──┤
├── Task 3: Submit → Poll → Token ──┼──→ Process Results
├── Task 4: Submit → Poll → Token ──┤
└── Task 5: Submit → Poll → Token ──┘
Each task runs independently. Submit and poll calls happen concurrently.
Python: asyncio
The most efficient approach for Python:
import asyncio
import aiohttp
import os
class AsyncSolver:
def __init__(self, api_key):
self.api_key = api_key
self.base = "https://ocr.captchaai.com"
async def solve(self, session, params, timeout=300):
params["key"] = self.api_key
async with session.get(f"{self.base}/in.php", params=params) as r:
text = await r.text()
if not text.startswith("OK|"):
raise Exception(text)
task_id = text.split("|")[1]
deadline = asyncio.get_event_loop().time() + timeout
poll_params = {"key": self.api_key, "action": "get", "id": task_id}
while asyncio.get_event_loop().time() < deadline:
await asyncio.sleep(5)
async with session.get(f"{self.base}/res.php", params=poll_params) as r:
text = await r.text()
if text == "CAPCHA_NOT_READY":
continue
if text.startswith("OK|"):
return text.split("|", 1)[1]
raise Exception(text)
raise TimeoutError()
async def solve_batch(tasks, max_concurrent=20):
solver = AsyncSolver(os.environ["CAPTCHAAI_API_KEY"])
semaphore = asyncio.Semaphore(max_concurrent)
async def limited_solve(session, params):
async with semaphore:
return await solver.solve(session, params)
async with aiohttp.ClientSession() as session:
futures = [limited_solve(session, t) for t in tasks]
results = await asyncio.gather(*futures, return_exceptions=True)
solved = sum(1 for r in results if not isinstance(r, Exception))
print(f"Solved {solved}/{len(tasks)}")
return results
# Example: solve 50 CAPTCHAs concurrently
tasks = [
{"method": "userrecaptcha", "googlekey": "6Le-wvkS...",
"pageurl": f"https://example.com/page{i}"}
for i in range(50)
]
results = asyncio.run(solve_batch(tasks, max_concurrent=20))
Python: ThreadPoolExecutor
For sync code, use threads:
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import time
import os
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
BASE = "https://ocr.captchaai.com"
def solve_one(params):
params["key"] = API_KEY
resp = requests.get(f"{BASE}/in.php", params=params)
if not resp.text.startswith("OK|"):
raise Exception(resp.text)
task_id = resp.text.split("|")[1]
deadline = time.time() + 300
while time.time() < deadline:
time.sleep(5)
result = requests.get(f"{BASE}/res.php", params={
"key": API_KEY, "action": "get", "id": task_id,
})
if result.text == "CAPCHA_NOT_READY":
continue
if result.text.startswith("OK|"):
return result.text.split("|", 1)[1]
raise Exception(result.text)
raise TimeoutError()
tasks = [
{"method": "userrecaptcha", "googlekey": "6Le-wvkS...",
"pageurl": f"https://example.com/page{i}"}
for i in range(20)
]
with ThreadPoolExecutor(max_workers=10) as pool:
futures = {pool.submit(solve_one, t): i for i, t in enumerate(tasks)}
for future in as_completed(futures):
idx = futures[future]
try:
token = future.result()
print(f"Task {idx}: solved ({len(token)} chars)")
except Exception as e:
print(f"Task {idx}: failed ({e})")
Node.js: Promise.all
const axios = require("axios");
const API_KEY = process.env.CAPTCHAAI_API_KEY;
async function solveOne(params) {
params.key = API_KEY;
const submit = await axios.get("https://ocr.captchaai.com/in.php", { params });
const taskId = String(submit.data).split("|")[1];
const deadline = Date.now() + 300000;
while (Date.now() < deadline) {
await new Promise((r) => setTimeout(r, 5000));
const poll = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: taskId },
});
const text = String(poll.data);
if (text === "CAPCHA_NOT_READY") continue;
if (text.startsWith("OK|")) return text.split("|").slice(1).join("|");
throw new Error(text);
}
throw new Error("Timeout");
}
async function solveBatch(taskList, concurrency = 10) {
// Process in chunks to limit concurrency
const results = [];
for (let i = 0; i < taskList.length; i += concurrency) {
const chunk = taskList.slice(i, i + concurrency);
const chunkResults = await Promise.allSettled(
chunk.map((task) => solveOne(task))
);
results.push(...chunkResults);
}
const solved = results.filter((r) => r.status === "fulfilled").length;
console.log(`Solved ${solved}/${results.length}`);
return results;
}
// Solve 30 CAPTCHAs, 10 at a time
const tasks = Array.from({ length: 30 }, (_, i) => ({
method: "userrecaptcha",
googlekey: "6Le-wvkS...",
pageurl: `https://example.com/page${i}`,
}));
solveBatch(tasks, 10);
Queue-Based Architecture
For production systems, use a task queue:
import asyncio
from asyncio import Queue
async def worker(name, queue, solver, session, results):
while True:
task_id, params = await queue.get()
try:
token = await solver.solve(session, params)
results[task_id] = {"status": "ok", "token": token}
except Exception as e:
results[task_id] = {"status": "error", "error": str(e)}
finally:
queue.task_done()
async def run_queue(tasks, num_workers=10):
solver = AsyncSolver(os.environ["CAPTCHAAI_API_KEY"])
queue = Queue()
results = {}
async with aiohttp.ClientSession() as session:
workers = [
asyncio.create_task(worker(f"w-{i}", queue, solver, session, results))
for i in range(num_workers)
]
for task_id, params in tasks.items():
await queue.put((task_id, params))
await queue.join()
for w in workers:
w.cancel()
return results
Performance Tips
| Factor | Recommendation |
|---|---|
| Concurrency level | Start with 10-20, scale based on balance |
| Poll interval | 5 seconds per task |
| Timeout | 300 seconds per task |
| Error handling | Use return_exceptions=True with gather |
| Session reuse | Reuse aiohttp/requests sessions |
| Rate limiting | Use semaphore to cap concurrent solves |
FAQ
How many CAPTCHAs can I solve in parallel?
CaptchaAI supports 100+ concurrent requests. Your limit is usually your balance and your proxy pool, not the API.
Does parallel solving cost more?
No. Each solve costs the same regardless of whether it's sequential or parallel.
Should I use threads or asyncio?
Use asyncio for Python. It handles thousands of concurrent tasks with minimal memory. Threads are fine for smaller workloads (< 50 concurrent).
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.