Tutorials

CSV-Driven Batch CAPTCHA Solving with Python and CaptchaAI

When you have hundreds or thousands of URLs that need CAPTCHA tokens, manual solving isn't practical. A CSV-driven batch processor reads your task list, submits tasks to CaptchaAI in parallel, and writes results back — giving you a complete output file with tokens, solve times, and error details.

CSV Input Format

Prepare your input CSV with the parameters needed for each task:

url,sitekey,captcha_type
https://example.com/page1,6LeIxAcTAAAAAJcZVRqyHh71UMIEGNQ_MXjiZKhI,recaptcha_v2
https://example.com/page2,6LfD3PIbAAAAAJs_eEHvoOl475b9Amjm0Brsg2eI,recaptcha_v2
https://example2.com/login,0x4AAAAAAA...,turnstile
https://example3.com/form,a1b2c3d4-e5f6-...,hcaptcha

Python Batch Processor

import csv
import time
import requests
import concurrent.futures
from datetime import datetime, timezone

API_KEY = "YOUR_API_KEY"
SUBMIT_URL = "https://ocr.captchaai.com/in.php"
RESULT_URL = "https://ocr.captchaai.com/res.php"
MAX_WORKERS = 10
POLL_INTERVAL = 5
MAX_POLL_TIME = 300


def submit_task(row):
    """Submit a single CAPTCHA task based on CSV row data."""
    captcha_type = row.get("captcha_type", "recaptcha_v2")

    params = {"key": API_KEY, "json": 1}

    if captcha_type == "recaptcha_v2":
        params.update({
            "method": "userrecaptcha",
            "googlekey": row["sitekey"],
            "pageurl": row["url"],
        })
    elif captcha_type == "recaptcha_v3":
        params.update({
            "method": "userrecaptcha",
            "googlekey": row["sitekey"],
            "pageurl": row["url"],
            "version": "v3",
            "action": row.get("action", "verify"),
            "min_score": row.get("min_score", "0.3"),
        })
    elif captcha_type == "turnstile":
        params.update({
            "method": "turnstile",
            "sitekey": row["sitekey"],
            "pageurl": row["url"],
        })
    elif captcha_type == "hcaptcha":
        params.update({
            "method": "hcaptcha",
            "sitekey": row["sitekey"],
            "pageurl": row["url"],
        })
    else:
        return None, f"Unknown type: {captcha_type}"

    try:
        response = requests.post(SUBMIT_URL, data=params, timeout=30)
        result = response.json()
        if result.get("status") == 1:
            return result["request"], None
        return None, result.get("request", "unknown error")
    except requests.RequestException as e:
        return None, str(e)


def poll_result(task_id):
    """Poll for a single task result."""
    start = time.monotonic()

    while time.monotonic() - start < MAX_POLL_TIME:
        time.sleep(POLL_INTERVAL)
        try:
            response = requests.get(
                RESULT_URL,
                params={"key": API_KEY, "action": "get", "id": task_id, "json": 1},
                timeout=15,
            )
            result = response.json()

            if result.get("request") == "CAPCHA_NOT_READY":
                continue

            if result.get("status") == 1:
                elapsed = time.monotonic() - start
                return result["request"], elapsed, None

            return None, time.monotonic() - start, result.get("request", "unknown")
        except requests.RequestException:
            continue

    return None, MAX_POLL_TIME, "TIMEOUT"


def solve_row(index, row):
    """Process a single CSV row: submit + poll."""
    row_start = time.monotonic()

    task_id, error = submit_task(row)
    if error:
        return {
            **row,
            "row_index": index,
            "status": "error",
            "error": error,
            "token": "",
            "solve_time": 0,
        }

    token, solve_time, error = poll_result(task_id)

    return {
        **row,
        "row_index": index,
        "task_id": task_id,
        "status": "solved" if token else "error",
        "token": token or "",
        "error": error or "",
        "solve_time": round(solve_time, 1),
    }


def process_csv(input_file, output_file, max_workers=MAX_WORKERS):
    """Process an entire CSV file of CAPTCHA tasks."""
    # Read input
    with open(input_file, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        rows = list(reader)

    print(f"Loaded {len(rows)} tasks from {input_file}")
    print(f"Processing with {max_workers} parallel workers...")

    results = []
    solved = 0
    failed = 0

    start_time = time.monotonic()

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(solve_row, i, row): i
            for i, row in enumerate(rows)
        }

        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)

            if result["status"] == "solved":
                solved += 1
                print(f"  [{solved + failed}/{len(rows)}] Row {result['row_index']}: "
                      f"solved in {result['solve_time']}s")
            else:
                failed += 1
                print(f"  [{solved + failed}/{len(rows)}] Row {result['row_index']}: "
                      f"FAILED — {result['error']}")

    # Sort results by original row order
    results.sort(key=lambda r: r["row_index"])

    # Write output
    output_fields = list(rows[0].keys()) + ["task_id", "status", "token", "error", "solve_time"]
    with open(output_file, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=output_fields, extrasaction="ignore")
        writer.writeheader()
        writer.writerows(results)

    total_time = time.monotonic() - start_time
    print(f"\nComplete: {solved} solved, {failed} failed out of {len(rows)}")
    print(f"Total time: {total_time:.1f}s")
    print(f"Output: {output_file}")


# Usage
process_csv("captcha_tasks.csv", "captcha_results.csv", max_workers=10)

JavaScript Batch Processor (Node.js)

const fs = require("fs");

const API_KEY = "YOUR_API_KEY";
const SUBMIT_URL = "https://ocr.captchaai.com/in.php";
const RESULT_URL = "https://ocr.captchaai.com/res.php";
const MAX_WORKERS = 10;
const POLL_INTERVAL = 5000;
const MAX_POLL_TIME = 300000;

function parseCSV(text) {
  const lines = text.trim().split("\n");
  const headers = lines[0].split(",").map((h) => h.trim());
  return lines.slice(1).map((line) => {
    const values = line.split(",").map((v) => v.trim());
    return Object.fromEntries(headers.map((h, i) => [h, values[i] || ""]));
  });
}

function toCSV(rows, fields) {
  const header = fields.join(",");
  const lines = rows.map((row) =>
    fields.map((f) => {
      const val = String(row[f] || "");
      return val.includes(",") ? `"${val}"` : val;
    }).join(",")
  );
  return [header, ...lines].join("\n");
}

async function submitTask(row) {
  const params = { key: API_KEY, json: 1 };
  const type = row.captcha_type || "recaptcha_v2";

  if (type === "recaptcha_v2") {
    Object.assign(params, { method: "userrecaptcha", googlekey: row.sitekey, pageurl: row.url });
  } else if (type === "turnstile") {
    Object.assign(params, { method: "turnstile", sitekey: row.sitekey, pageurl: row.url });
  } else if (type === "hcaptcha") {
    Object.assign(params, { method: "hcaptcha", sitekey: row.sitekey, pageurl: row.url });
  }

  const response = await fetch(SUBMIT_URL, { method: "POST", body: new URLSearchParams(params) });
  const result = await response.json();
  if (result.status !== 1) throw new Error(result.request || "Submit failed");
  return result.request;
}

async function pollResult(taskId) {
  const start = Date.now();
  while (Date.now() - start < MAX_POLL_TIME) {
    await new Promise((r) => setTimeout(r, POLL_INTERVAL));
    const url = new URL(RESULT_URL);
    url.searchParams.set("key", API_KEY);
    url.searchParams.set("action", "get");
    url.searchParams.set("id", taskId);
    url.searchParams.set("json", "1");

    const response = await fetch(url);
    const result = await response.json();

    if (result.request === "CAPCHA_NOT_READY") continue;
    if (result.status === 1) return { token: result.request, time: (Date.now() - start) / 1000 };
    throw new Error(result.request || "Poll failed");
  }
  throw new Error("TIMEOUT");
}

async function solveRow(index, row) {
  try {
    const taskId = await submitTask(row);
    const { token, time } = await pollResult(taskId);
    return { ...row, row_index: index, task_id: taskId, status: "solved", token, error: "", solve_time: time.toFixed(1) };
  } catch (err) {
    return { ...row, row_index: index, status: "error", token: "", error: err.message, solve_time: 0 };
  }
}

async function processCSV(inputFile, outputFile, maxWorkers = MAX_WORKERS) {
  const text = fs.readFileSync(inputFile, "utf8");
  const rows = parseCSV(text);
  console.log(`Loaded ${rows.length} tasks from ${inputFile}`);

  const results = [];
  // Process in batches to limit concurrency
  for (let i = 0; i < rows.length; i += maxWorkers) {
    const batch = rows.slice(i, i + maxWorkers);
    const batchResults = await Promise.all(
      batch.map((row, j) => solveRow(i + j, row))
    );

    for (const result of batchResults) {
      results.push(result);
      const tag = result.status === "solved" ? "solved" : `FAILED — ${result.error}`;
      console.log(`  [${results.length}/${rows.length}] Row ${result.row_index}: ${tag}`);
    }
  }

  results.sort((a, b) => a.row_index - b.row_index);

  const fields = [...Object.keys(rows[0]), "task_id", "status", "token", "error", "solve_time"];
  fs.writeFileSync(outputFile, toCSV(results, fields));
  const solved = results.filter((r) => r.status === "solved").length;
  console.log(`\nComplete: ${solved} solved, ${results.length - solved} failed`);
  console.log(`Output: ${outputFile}`);
}

processCSV("captcha_tasks.csv", "captcha_results.csv", 10);

Output CSV Format

The output file includes all original columns plus solve results:

url,sitekey,captcha_type,task_id,status,token,error,solve_time
https://example.com/page1,6LeIxAc...,recaptcha_v2,73948572,solved,03AGdBq24...,",15.3
https://example.com/page2,6LfD3PI...,recaptcha_v2,73948573,solved,03AHJ_Vu...,",12.8
https://example2.com/login,0x4AAAA...,turnstile,73948574,error,,ERROR_CAPTCHA_UNSOLVABLE,45.0

Configuration Options

Parameter Default Description
MAX_WORKERS 10 Concurrent solve tasks
POLL_INTERVAL 5s Seconds between poll requests
MAX_POLL_TIME 300s Maximum wait per task

Tuning Concurrency

Scenario Recommended Workers
Small batch (< 50 tasks) 5
Medium batch (50–500 tasks) 10–20
Large batch (500+ tasks) 20–50 (check account rate limits)

Higher concurrency processes faster but increases API load. Start conservative and increase.

Troubleshooting

Issue Cause Fix
Many ERROR_NO_SLOT_AVAILABLE errors Too many concurrent submissions Reduce MAX_WORKERS; add delay between submissions
All tasks timeout Wrong sitekey or pageurl for all rows Verify CSV data — test one row manually first
Output CSV has empty tokens Solve failed but error column shows reason Check the error column for each failed row
CSV parsing errors Commas in URLs or sitekeys Use proper CSV quoting — wrap values containing commas in double quotes
Memory issues with large files Loading entire CSV into memory Process in chunks — read and process N rows at a time

FAQ

How many tasks can I process in one batch?

There's no hard limit. CaptchaAI handles the concurrency on their side. Practically, batches of 10,000+ work fine with 20–50 workers. The bottleneck is usually solve time (10–30 seconds per task), not submission.

Can I resume a failed batch?

Yes — check the output CSV for rows with status=error. Filter them into a new input CSV and reprocess. The Python processor preserves row order, so you can merge results afterward.

How do I estimate the cost of a batch?

Check your balance before and after a test batch of 10 rows. Multiply the cost per solve by your total row count. Different CAPTCHA types have different pricing — reCAPTCHA and Turnstile typically cost $1–3 per 1,000 solves.

Next Steps

Start processing CAPTCHA tasks in bulk — get your CaptchaAI API key and prepare your CSV.

Related guides:

Discussions (0)

No comments yet.

Related Posts

DevOps & Scaling Ansible Playbooks for CaptchaAI Worker Deployment
Deploy and manage Captcha AI workers with Ansible — playbooks for provisioning, configuration, rolling updates, and health checks across your server fleet.

Deploy and manage Captcha AI workers with Ansible — playbooks for provisioning, configuration, rolling updates...

Automation Python All CAPTCHA Types
Apr 07, 2026
DevOps & Scaling Blue-Green Deployment for CAPTCHA Solving Infrastructure
Implement blue-green deployments for CAPTCHA solving infrastructure — zero-downtime upgrades, traffic switching, and rollback strategies with Captcha AI.

Implement blue-green deployments for CAPTCHA solving infrastructure — zero-downtime upgrades, traffic switchin...

Automation Python All CAPTCHA Types
Apr 07, 2026
Troubleshooting CaptchaAI API Error Handling: Complete Decision Tree
Complete decision tree for every Captcha AI API error.

Complete decision tree for every Captcha AI API error. Learn which errors are retryable, which need parameter...

Automation Python All CAPTCHA Types
Mar 17, 2026
Tutorials Using Fiddler to Inspect CaptchaAI API Traffic
How to use Fiddler Everywhere and Fiddler Classic to capture, inspect, and debug Captcha AI API requests and responses — filters, breakpoints, and replay for tr...

How to use Fiddler Everywhere and Fiddler Classic to capture, inspect, and debug Captcha AI API requests and r...

Automation Python All CAPTCHA Types
Mar 05, 2026
Tutorials CAPTCHA Handling in Mobile Apps with Appium
Handle CAPTCHAs in mobile app automation using Appium and Captcha AI — extract Web sitekeys, solve, and inject tokens on Android and i OS.

Handle CAPTCHAs in mobile app automation using Appium and Captcha AI — extract Web View sitekeys, solve, and i...

Automation Python All CAPTCHA Types
Feb 13, 2026
Tutorials Streaming Batch Results: Processing CAPTCHA Solutions as They Arrive
Process CAPTCHA solutions the moment they arrive instead of waiting for tasks to complete — use async generators, event emitters, and callback patterns for stre...

Process CAPTCHA solutions the moment they arrive instead of waiting for all tasks to complete — use async gene...

Automation Python All CAPTCHA Types
Apr 07, 2026
Reference CaptchaAI CLI Tool: Command-Line CAPTCHA Solving and Testing
A reference for building and using a Captcha AI command-line tool — solve CAPTCHAs, check balance, test parameters, and integrate with shell scripts and CI/CD p...

A reference for building and using a Captcha AI command-line tool — solve CAPTCHAs, check balance, test parame...

Automation Python All CAPTCHA Types
Feb 26, 2026
DevOps & Scaling Auto-Scaling CAPTCHA Solving Workers
Build auto-scaling CAPTCHA solving workers that adjust capacity based on queue depth, balance, and solve rates.

Build auto-scaling CAPTCHA solving workers that adjust capacity based on queue depth, balance, and solve rates...

Automation Python All CAPTCHA Types
Mar 23, 2026
DevOps & Scaling CaptchaAI Monitoring with Datadog: Metrics and Alerts
Monitor Captcha AI performance with Datadog — custom metrics, dashboards, anomaly detection alerts, and solve rate tracking for CAPTCHA solving pipelines.

Monitor Captcha AI performance with Datadog — custom metrics, dashboards, anomaly detection alerts, and solve...

Automation Python All CAPTCHA Types
Feb 19, 2026
Tutorials Pytest Fixtures for CaptchaAI API Testing
Build reusable pytest fixtures to test CAPTCHA-solving workflows with Captcha AI.

Build reusable pytest fixtures to test CAPTCHA-solving workflows with Captcha AI. Covers mocking, live integra...

Automation Python reCAPTCHA v2
Apr 08, 2026
Tutorials GeeTest Token Injection in Browser Automation Frameworks
how to inject Gee Test v 3 solution tokens into Playwright, Puppeteer, and Selenium — including the three-value response, callback triggering, and form submissi...

Learn how to inject Gee Test v 3 solution tokens into Playwright, Puppeteer, and Selenium — including the thre...

Automation Python Testing
Jan 18, 2026