Tutorials

MongoDB for CAPTCHA Solve History and Analytics

MongoDB's flexible schema and aggregation framework make it a strong fit for CAPTCHA solve tracking. Store every solve attempt with metadata, then query patterns across time, CAPTCHA types, and error rates.

Why MongoDB for CAPTCHA Data

CAPTCHA solve records have variable fields depending on the type — reCAPTCHA needs googlekey, hCaptcha needs sitekey, image CAPTCHAs need body. MongoDB's schemaless documents handle these naturally without schema migrations.

Document Schema

{
  "_id": "ObjectId",
  "captcha_id": "12345678",
  "type": "recaptcha_v2",
  "method": "userrecaptcha",
  "sitekey": "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-",
  "pageurl": "https://example.com/form",
  "status": "solved",
  "solution": "03AGdBq26...",
  "error": null,
  "submitted_at": "2026-04-04T10:15:30.000Z",
  "solved_at": "2026-04-04T10:15:45.000Z",
  "elapsed_ms": 15000,
  "polls": 3,
  "proxy_used": true,
  "cost": 0.00299,
  "metadata": {
    "project": "price-monitor",
    "worker_id": "worker-3",
    "target_domain": "example.com"
  }
}

Python Implementation

Setup and Connection

import os
import time
from datetime import datetime, timezone
from pymongo import MongoClient, ASCENDING, DESCENDING
import requests

MONGO_URI = os.environ.get("MONGO_URI", "mongodb://localhost:27017")
API_KEY = os.environ["CAPTCHAAI_API_KEY"]

client = MongoClient(MONGO_URI)
db = client["captcha_tracking"]
solves = db["solves"]

Create Indexes

def setup_indexes():
    solves.create_index([("submitted_at", DESCENDING)])
    solves.create_index([("type", ASCENDING), ("status", ASCENDING)])
    solves.create_index([("metadata.project", ASCENDING)])
    solves.create_index([("metadata.target_domain", ASCENDING)])
    solves.create_index(
        [("submitted_at", ASCENDING)],
        expireAfterSeconds=90 * 24 * 3600,  # Auto-delete after 90 days
        name="ttl_cleanup"
    )

setup_indexes()

Solve and Store

def solve_and_store(sitekey, pageurl, captcha_type="recaptcha_v2", metadata=None):
    record = {
        "type": captcha_type,
        "method": "userrecaptcha",
        "sitekey": sitekey,
        "pageurl": pageurl,
        "status": "submitted",
        "submitted_at": datetime.now(timezone.utc),
        "metadata": metadata or {}
    }

    result = solves.insert_one(record)
    doc_id = result.inserted_id

    # Submit to CaptchaAI
    resp = requests.post("https://ocr.captchaai.com/in.php", data={
        "key": API_KEY,
        "method": "userrecaptcha",
        "googlekey": sitekey,
        "pageurl": pageurl,
        "json": 1
    })
    data = resp.json()

    if data.get("status") != 1:
        solves.update_one(
            {"_id": doc_id},
            {"$set": {"status": "error", "error": data.get("request")}}
        )
        return None

    captcha_id = data["request"]
    solves.update_one(
        {"_id": doc_id},
        {"$set": {"captcha_id": captcha_id, "status": "polling"}}
    )

    # Poll for result
    polls = 0
    for _ in range(60):
        time.sleep(5)
        polls += 1
        poll_resp = requests.get("https://ocr.captchaai.com/res.php", params={
            "key": API_KEY, "action": "get",
            "id": captcha_id, "json": 1
        }).json()

        if poll_resp.get("status") == 1:
            solved_at = datetime.now(timezone.utc)
            elapsed_ms = int(
                (solved_at - record["submitted_at"]).total_seconds() * 1000
            )
            solves.update_one({"_id": doc_id}, {"$set": {
                "status": "solved",
                "solution": poll_resp["request"],
                "solved_at": solved_at,
                "elapsed_ms": elapsed_ms,
                "polls": polls
            }})
            return poll_resp["request"]

        if poll_resp.get("request") != "CAPCHA_NOT_READY":
            solves.update_one({"_id": doc_id}, {"$set": {
                "status": "error",
                "error": poll_resp.get("request"),
                "polls": polls
            }})
            return None

    solves.update_one({"_id": doc_id}, {"$set": {
        "status": "timeout", "polls": polls
    }})
    return None

Analytics Queries

def get_success_rate(hours=24):
    """Success rate for the last N hours."""
    from datetime import timedelta
    cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)

    pipeline = [
        {"$match": {"submitted_at": {"$gte": cutoff}}},
        {"$group": {
            "_id": "$status",
            "count": {"$sum": 1}
        }}
    ]
    results = {r["_id"]: r["count"] for r in solves.aggregate(pipeline)}
    total = sum(results.values())
    solved = results.get("solved", 0)
    return (solved / total * 100) if total else 0


def get_avg_solve_time_by_type():
    """Average solve time grouped by CAPTCHA type."""
    pipeline = [
        {"$match": {"status": "solved"}},
        {"$group": {
            "_id": "$type",
            "avg_time_ms": {"$avg": "$elapsed_ms"},
            "min_time_ms": {"$min": "$elapsed_ms"},
            "max_time_ms": {"$max": "$elapsed_ms"},
            "count": {"$sum": 1}
        }},
        {"$sort": {"count": -1}}
    ]
    return list(solves.aggregate(pipeline))


def get_hourly_solve_volume(days=7):
    """Hourly solve volume for charting."""
    from datetime import timedelta
    cutoff = datetime.now(timezone.utc) - timedelta(days=days)

    pipeline = [
        {"$match": {"submitted_at": {"$gte": cutoff}}},
        {"$group": {
            "_id": {
                "date": {"$dateToString": {"format": "%Y-%m-%d", "date": "$submitted_at"}},
                "hour": {"$hour": "$submitted_at"}
            },
            "total": {"$sum": 1},
            "solved": {"$sum": {"$cond": [{"$eq": ["$status", "solved"]}, 1, 0]}}
        }},
        {"$sort": {"_id.date": 1, "_id.hour": 1}}
    ]
    return list(solves.aggregate(pipeline))


def get_error_breakdown(hours=24):
    """Error frequency by error code."""
    from datetime import timedelta
    cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)

    pipeline = [
        {"$match": {"submitted_at": {"$gte": cutoff}, "status": "error"}},
        {"$group": {"_id": "$error", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}}
    ]
    return list(solves.aggregate(pipeline))

JavaScript Implementation

const { MongoClient } = require("mongodb");
const axios = require("axios");

const MONGO_URI = process.env.MONGO_URI || "mongodb://localhost:27017";
const API_KEY = process.env.CAPTCHAAI_API_KEY;

let db, solves;

async function connect() {
  const client = await MongoClient.connect(MONGO_URI);
  db = client.db("captcha_tracking");
  solves = db.collection("solves");

  await solves.createIndex({ submitted_at: -1 });
  await solves.createIndex({ type: 1, status: 1 });
  await solves.createIndex({ "metadata.project": 1 });
  await solves.createIndex(
    { submitted_at: 1 },
    { expireAfterSeconds: 90 * 24 * 3600 }
  );
}

async function solveAndStore(sitekey, pageurl, type = "recaptcha_v2", metadata = {}) {
  const submittedAt = new Date();
  const { insertedId } = await solves.insertOne({
    type, method: "userrecaptcha", sitekey, pageurl,
    status: "submitted", submitted_at: submittedAt, metadata,
  });

  const submit = await axios.post("https://ocr.captchaai.com/in.php", null, {
    params: { key: API_KEY, method: "userrecaptcha", googlekey: sitekey, pageurl, json: 1 },
  });

  if (submit.data.status !== 1) {
    await solves.updateOne({ _id: insertedId }, { $set: { status: "error", error: submit.data.request } });
    return null;
  }

  const captchaId = submit.data.request;
  await solves.updateOne({ _id: insertedId }, { $set: { captcha_id: captchaId, status: "polling" } });

  let polls = 0;
  for (let i = 0; i < 60; i++) {
    await new Promise((r) => setTimeout(r, 5000));
    polls++;
    const poll = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
    });

    if (poll.data.status === 1) {
      const solvedAt = new Date();
      await solves.updateOne({ _id: insertedId }, { $set: {
        status: "solved", solution: poll.data.request,
        solved_at: solvedAt, elapsed_ms: solvedAt - submittedAt, polls,
      }});
      return poll.data.request;
    }
    if (poll.data.request !== "CAPCHA_NOT_READY") {
      await solves.updateOne({ _id: insertedId }, { $set: { status: "error", error: poll.data.request, polls } });
      return null;
    }
  }

  await solves.updateOne({ _id: insertedId }, { $set: { status: "timeout", polls } });
  return null;
}

async function getSuccessRate(hours = 24) {
  const cutoff = new Date(Date.now() - hours * 3600 * 1000);
  const pipeline = [
    { $match: { submitted_at: { $gte: cutoff } } },
    { $group: { _id: "$status", count: { $sum: 1 } } },
  ];
  const results = await solves.aggregate(pipeline).toArray();
  const total = results.reduce((s, r) => s + r.count, 0);
  const solved = results.find((r) => r._id === "solved")?.count || 0;
  return total ? ((solved / total) * 100).toFixed(1) : 0;
}

Data Retention

Strategy TTL Index Use Case
30-day retention expireAfterSeconds: 2592000 Development/testing
90-day retention expireAfterSeconds: 7776000 Production analytics
Permanent (with archival) No TTL; use capped collection or cold storage Compliance/audit

Troubleshooting

Issue Cause Fix
Slow aggregation queries Missing indexes on submitted_at and type Run setup_indexes() — see index section above
Documents growing large Storing full solutions in every record Store solution hashes or truncate after use
TTL not deleting old records TTL monitor runs every 60 seconds; large backlogs take time Wait for background cleanup; check index with db.solves.getIndexes()
Connection pool exhaustion Too many concurrent solve operations Set maxPoolSize in connection string

FAQ

Should I store the full CAPTCHA solution token?

For debugging, store tokens for 24–48 hours then let the TTL index clean them up. For long-term analytics, store only metadata (type, time, status, error) — tokens are useless after expiration anyway.

How much storage does this use?

Each solve record is roughly 500 bytes to 2 KB depending on metadata. At 10,000 solves/day with 90-day retention, expect about 1–2 GB. MongoDB handles this easily.

Can I use MongoDB Atlas (cloud)?

Yes. Atlas supports TTL indexes and aggregation pipelines. Use the connection string from your Atlas dashboard in MONGO_URI.

Next Steps

Track every CAPTCHA solve and spot issues before they impact your pipeline — get your CaptchaAI API key.

Related guides:

Discussions (0)

No comments yet.

Related Posts

Tutorials Streaming Batch Results: Processing CAPTCHA Solutions as They Arrive
Process CAPTCHA solutions the moment they arrive instead of waiting for tasks to complete — use async generators, event emitters, and callback patterns for stre...

Process CAPTCHA solutions the moment they arrive instead of waiting for all tasks to complete — use async gene...

Automation Python All CAPTCHA Types
Apr 07, 2026
DevOps & Scaling Blue-Green Deployment for CAPTCHA Solving Infrastructure
Implement blue-green deployments for CAPTCHA solving infrastructure — zero-downtime upgrades, traffic switching, and rollback strategies with Captcha AI.

Implement blue-green deployments for CAPTCHA solving infrastructure — zero-downtime upgrades, traffic switchin...

Automation Python All CAPTCHA Types
Apr 07, 2026
Reference CAPTCHA Solving Performance by Region: Latency Analysis
Analyze how geographic region affects Captcha AI solve times — network latency, proxy location, and optimization strategies for global deployments.

Analyze how geographic region affects Captcha AI solve times — network latency, proxy location, and optimizati...

Automation Python All CAPTCHA Types
Apr 05, 2026
DevOps & Scaling Ansible Playbooks for CaptchaAI Worker Deployment
Deploy and manage Captcha AI workers with Ansible — playbooks for provisioning, configuration, rolling updates, and health checks across your server fleet.

Deploy and manage Captcha AI workers with Ansible — playbooks for provisioning, configuration, rolling updates...

Automation Python All CAPTCHA Types
Apr 07, 2026
Explainers Building Responsible Automation with CaptchaAI
Build responsible automation systems with Captcha AI.

Build responsible automation systems with Captcha AI. Guidelines for sustainable workflows, resource managemen...

Automation Python All CAPTCHA Types
Tutorials Bulkhead Pattern: Isolating CAPTCHA Solving Failures
Apply the bulkhead pattern to isolate CAPTCHA solving failures — partition resources into independent pools so a slow or failing solver type doesn't starve othe...

Apply the bulkhead pattern to isolate CAPTCHA solving failures — partition resources into independent pools so...

Automation Python All CAPTCHA Types
Apr 07, 2026
Tutorials Webhook Endpoint Monitoring for CAPTCHA Solve Callbacks
Monitor your Captcha AI callback endpoints — track uptime, response latency, error rates, and set up alerts before missed results impact your pipeline.

Monitor your Captcha AI callback endpoints — track uptime, response latency, error rates, and set up alerts be...

Automation Python All CAPTCHA Types
Mar 12, 2026
Tutorials Discord Webhook Alerts for CAPTCHA Pipeline Status
Send CAPTCHA pipeline alerts to Discord — webhook integration for balance warnings, error spikes, queue status, and daily summary reports with Captcha AI.

Send CAPTCHA pipeline alerts to Discord — webhook integration for balance warnings, error spikes, queue status...

Automation Python All CAPTCHA Types
API Tutorials Graceful Degradation When CAPTCHA Solving Fails
Keep your automation running when CAPTCHA solving fails — fallback strategies, queue-based retries, and degraded-mode patterns.

Keep your automation running when CAPTCHA solving fails — fallback strategies, queue-based retries, and degrad...

Automation Python All CAPTCHA Types
Apr 06, 2026
Explainers Rate Limiting CAPTCHA Solving Workflows
Sending too many requests too fast triggers blocks, bans, and wasted CAPTCHA solves.

Sending too many requests too fast triggers blocks, bans, and wasted CAPTCHA solves. Smart rate limiting keeps...

Automation Python All CAPTCHA Types
Apr 04, 2026
Tutorials CAPTCHA Handling in Flask Applications with CaptchaAI
Integrate Captcha AI into Flask applications for automated CAPTCHA solving.

Integrate Captcha AI into Flask applications for automated CAPTCHA solving. Includes service class, API endpoi...

Automation Cloudflare Turnstile
Mar 17, 2026
Tutorials Image CAPTCHA Confidence Scores: Using CaptchaAI Quality Metrics
how to use Captcha AI's confidence indicators for image CAPTCHA solutions — assess answer quality, implement confidence-based retry logic, and optimize solve ra...

Learn how to use Captcha AI's confidence indicators for image CAPTCHA solutions — assess answer quality, imple...

Automation Python Image OCR
Mar 30, 2026