DevOps & Scaling

ELK Stack for CAPTCHA Solving Log Analysis

When your CAPTCHA pipeline processes thousands of tasks, grep doesn't scale. The ELK Stack (Elasticsearch, Logstash, Kibana) lets you search, aggregate, and visualize solve logs — find error patterns, track latency trends, and diagnose issues in seconds.

Architecture

[CAPTCHA Workers] → JSON logs → [Filebeat] → [Logstash] → [Elasticsearch]
                                                                ↓
                                                           [Kibana]

Structured Logging

Python — JSON Log Output

import os
import json
import time
import logging
import sys
import requests

API_KEY = os.environ["CAPTCHAAI_API_KEY"]


class JSONFormatter(logging.Formatter):
    def format(self, record):
        log_entry = {
            "timestamp": self.formatTime(record),
            "level": record.levelname,
            "logger": record.name,
            "message": record.getMessage(),
        }
        # Add extra fields
        if hasattr(record, "captcha_id"):
            log_entry["captcha_id"] = record.captcha_id
        if hasattr(record, "captcha_type"):
            log_entry["captcha_type"] = record.captcha_type
        if hasattr(record, "solve_time"):
            log_entry["solve_time"] = record.solve_time
        if hasattr(record, "error_code"):
            log_entry["error_code"] = record.error_code
        if hasattr(record, "target_url"):
            log_entry["target_url"] = record.target_url
        if hasattr(record, "poll_count"):
            log_entry["poll_count"] = record.poll_count
        return json.dumps(log_entry)


# Configure logger
logger = logging.getLogger("captchaai")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(JSONFormatter())
logger.addHandler(handler)

session = requests.Session()


def solve_captcha(sitekey, pageurl, captcha_type="recaptcha_v2"):
    extra = {"captcha_type": captcha_type, "target_url": pageurl}

    # Submit
    resp = session.post("https://ocr.captchaai.com/in.php", data={
        "key": API_KEY,
        "method": "userrecaptcha",
        "googlekey": sitekey,
        "pageurl": pageurl,
        "json": 1
    })
    data = resp.json()

    if data.get("status") != 1:
        logger.error("Submit failed", extra={
            **extra, "error_code": data.get("request")
        })
        return {"error": data.get("request")}

    captcha_id = data["request"]
    extra["captcha_id"] = captcha_id
    logger.info("Task submitted", extra=extra)

    # Poll
    start = time.time()
    poll_count = 0
    for _ in range(60):
        time.sleep(5)
        poll_count += 1
        result = session.get("https://ocr.captchaai.com/res.php", params={
            "key": API_KEY, "action": "get", "id": captcha_id, "json": 1
        }).json()

        if result.get("status") == 1:
            elapsed = round(time.time() - start, 2)
            logger.info("Solve success", extra={
                **extra,
                "solve_time": elapsed,
                "poll_count": poll_count
            })
            return {"solution": result["request"]}

        if result.get("request") != "CAPCHA_NOT_READY":
            logger.error("Solve failed", extra={
                **extra,
                "error_code": result.get("request"),
                "poll_count": poll_count
            })
            return {"error": result.get("request")}

    logger.error("Solve timeout", extra={
        **extra,
        "error_code": "TIMEOUT",
        "poll_count": poll_count
    })
    return {"error": "TIMEOUT"}

JavaScript — Structured Logging

const axios = require("axios");

const API_KEY = process.env.CAPTCHAAI_API_KEY;

function log(level, message, fields = {}) {
  const entry = {
    timestamp: new Date().toISOString(),
    level,
    message,
    service: "captcha-worker",
    ...fields,
  };
  console.log(JSON.stringify(entry));
}

async function solveCaptcha(sitekey, pageurl, captchaType = "recaptcha_v2") {
  const fields = { captchaType, targetUrl: pageurl };

  const submitResp = await axios.post("https://ocr.captchaai.com/in.php", null, {
    params: {
      key: API_KEY, method: "userrecaptcha",
      googlekey: sitekey, pageurl, json: 1,
    },
  });

  if (submitResp.data.status !== 1) {
    log("error", "Submit failed", { ...fields, errorCode: submitResp.data.request });
    return { error: submitResp.data.request };
  }

  const captchaId = submitResp.data.request;
  fields.captchaId = captchaId;
  log("info", "Task submitted", fields);

  const startTime = Date.now();
  let pollCount = 0;

  for (let i = 0; i < 60; i++) {
    await new Promise((r) => setTimeout(r, 5000));
    pollCount++;

    const pollResp = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
    });

    if (pollResp.data.status === 1) {
      const solveTime = ((Date.now() - startTime) / 1000).toFixed(2);
      log("info", "Solve success", { ...fields, solveTime: parseFloat(solveTime), pollCount });
      return { solution: pollResp.data.request };
    }

    if (pollResp.data.request !== "CAPCHA_NOT_READY") {
      log("error", "Solve failed", { ...fields, errorCode: pollResp.data.request, pollCount });
      return { error: pollResp.data.request };
    }
  }

  log("error", "Solve timeout", { ...fields, errorCode: "TIMEOUT", pollCount });
  return { error: "TIMEOUT" };
}

module.exports = { solveCaptcha };

Filebeat Configuration

# filebeat.yml
filebeat.inputs:

  - type: log
    paths:

      - /var/log/captcha-worker/*.log
    json:
      keys_under_root: true
      add_error_key: true
      message_key: message

output.logstash:
  hosts: ["logstash:5044"]

Logstash Pipeline

# logstash-captcha.conf
input {
  beats {
    port => 5044
  }
}

filter {
  # Parse JSON logs
  json {
    source => "message"
    target => "captcha"
  }

  # Add computed fields
  if [captcha][solve_time] {
    mutate {
      add_field => {
        "solve_time_bucket" => "fast"
      }
    }
    if [captcha][solve_time] > 30 {
      mutate { update => { "solve_time_bucket" => "medium" } }
    }
    if [captcha][solve_time] > 90 {
      mutate { update => { "solve_time_bucket" => "slow" } }
    }
  }

  # Extract date
  date {
    match => ["[captcha][timestamp]", "ISO8601"]
    target => "@timestamp"
  }
}

output {
  elasticsearch {
    hosts => ["elasticsearch:9200"]
    index => "captcha-logs-%{+YYYY.MM.dd}"
  }
}

Elasticsearch Index Template

{
  "index_patterns": ["captcha-logs-*"],
  "template": {
    "settings": {
      "number_of_shards": 1,
      "number_of_replicas": 0
    },
    "mappings": {
      "properties": {
        "captcha_type": { "type": "keyword" },
        "captcha_id": { "type": "keyword" },
        "error_code": { "type": "keyword" },
        "solve_time": { "type": "float" },
        "poll_count": { "type": "integer" },
        "target_url": { "type": "keyword" },
        "level": { "type": "keyword" },
        "message": { "type": "text" }
      }
    }
  }
}

Kibana Dashboard Panels

Panel Visualization Query
Solve success rate Metric level:info AND message:"Solve success" / total
Error breakdown Pie chart level:error grouped by error_code
Latency over time Line chart Average solve_time over time
Errors over time Bar chart Count level:error per 5-minute bucket
Slowest solves Data table Top 10 by solve_time descending
Queue activity Area chart Count by message ("Task submitted" vs "Solve success")

Useful Queries

# All errors in the last hour
level:error AND @timestamp:[now-1h TO now]

# Timeout errors for reCAPTCHA
error_code:TIMEOUT AND captcha_type:recaptcha_v2

# Slow solves (> 60 seconds)
solve_time:>60

# Errors for a specific target URL
level:error AND target_url:"example.com"

# Specific CAPTCHA ID investigation
captcha_id:"73519847"

Troubleshooting

Issue Cause Fix
Logs not appearing in Kibana Filebeat not shipping logs Check Filebeat logs; verify path pattern matches
JSON parsing errors Non-JSON lines in log file Add json.keys_under_root to Filebeat; fix logger output
Too many indices Daily index without ILM Set up Index Lifecycle Management with 30-day retention
Slow queries Missing keyword mapping Use keyword type for filterable fields, not text

FAQ

How long should I retain CAPTCHA logs?

30 days for operational logs. 90 days if you need trend analysis. Use Elasticsearch ILM to automatically delete old indices.

Can I use OpenSearch instead of Elasticsearch?

Yes. OpenSearch is API-compatible with Elasticsearch. The Logstash output plugin, Filebeat, and Kibana alternatives (OpenSearch Dashboards) work the same way.

Should I log the CAPTCHA solution text?

No. Solutions are single-use tokens with no diagnostic value. Logging them adds storage cost and could create security issues. Log only metadata (ID, type, latency, status).

Next Steps

Search and analyze your CAPTCHA logs — get your CaptchaAI API key and set up ELK.

Related guides:

Discussions (0)

No comments yet.

Related Posts

DevOps & Scaling Blue-Green Deployment for CAPTCHA Solving Infrastructure
Implement blue-green deployments for CAPTCHA solving infrastructure — zero-downtime upgrades, traffic switching, and rollback strategies with Captcha AI.

Implement blue-green deployments for CAPTCHA solving infrastructure — zero-downtime upgrades, traffic switchin...

Automation Python All CAPTCHA Types
Apr 07, 2026
DevOps & Scaling Ansible Playbooks for CaptchaAI Worker Deployment
Deploy and manage Captcha AI workers with Ansible — playbooks for provisioning, configuration, rolling updates, and health checks across your server fleet.

Deploy and manage Captcha AI workers with Ansible — playbooks for provisioning, configuration, rolling updates...

Automation Python All CAPTCHA Types
Apr 07, 2026
DevOps & Scaling Rolling Updates for CAPTCHA Solving Worker Fleets
Implement rolling updates for CAPTCHA solving worker fleets — zero-downtime upgrades, graceful draining, health-gated progression, and automatic rollback.

Implement rolling updates for CAPTCHA solving worker fleets — zero-downtime upgrades, graceful draining, healt...

Automation Python All CAPTCHA Types
Feb 28, 2026
DevOps & Scaling AWS Lambda + CaptchaAI: Serverless CAPTCHA Solving
Integrate Captcha AI with AWS Lambda for serverless CAPTCHA solving.

Integrate Captcha AI with AWS Lambda for serverless CAPTCHA solving. Deploy functions, manage API keys with Se...

Automation Python All CAPTCHA Types
Feb 17, 2026
Tutorials Queue-Based Batch CAPTCHA Processing with Priority Levels
How to build a priority queue system for CAPTCHA solving — assign priority levels to tasks, process high-priority requests first, manage concurrency limits, and...

How to build a priority queue system for CAPTCHA solving — assign priority levels to tasks, process high-prior...

Automation Python All CAPTCHA Types
Mar 20, 2026
DevOps & Scaling Terraform + CaptchaAI: Infrastructure as Code for CAPTCHA Workers
Deploy CAPTCHA solving infrastructure with Terraform — provision cloud workers, configure auto-scaling, manage secrets, and version your Captcha AI setup as cod...

Deploy CAPTCHA solving infrastructure with Terraform — provision cloud workers, configure auto-scaling, manage...

Automation Python All CAPTCHA Types
Mar 15, 2026
DevOps & Scaling Horizontal Scaling CAPTCHA Solving Workers: When and How
Scale CAPTCHA solving horizontally — identify bottlenecks, add workers dynamically, auto-scale based on queue depth, and manage costs with Captcha AI.

Scale CAPTCHA solving horizontally — identify bottlenecks, add workers dynamically, auto-scale based on queue...

Automation Python All CAPTCHA Types
Mar 07, 2026
DevOps & Scaling CaptchaAI Behind a Load Balancer: Architecture Patterns
Architect CAPTCHA solving workers behind a load balancer — routing strategies, health checks, sticky sessions, and scaling patterns with Captcha AI.

Architect CAPTCHA solving workers behind a load balancer — routing strategies, health checks, sticky sessions,...

Automation Python All CAPTCHA Types
Feb 24, 2026
DevOps & Scaling CaptchaAI Monitoring with Datadog: Metrics and Alerts
Monitor Captcha AI performance with Datadog — custom metrics, dashboards, anomaly detection alerts, and solve rate tracking for CAPTCHA solving pipelines.

Monitor Captcha AI performance with Datadog — custom metrics, dashboards, anomaly detection alerts, and solve...

Automation Python All CAPTCHA Types
Feb 19, 2026
Tutorials Health Check Endpoints for CAPTCHA Solving Workers
Build health check endpoints for CAPTCHA solving workers — expose liveness, readiness, and dependency checks so orchestrators can detect and replace unhealthy i...

Build health check endpoints for CAPTCHA solving workers — expose liveness, readiness, and dependency checks s...

Automation Python All CAPTCHA Types
Jan 27, 2026
DevOps & Scaling NATS Messaging + CaptchaAI: Lightweight CAPTCHA Task Distribution
Use NATS messaging for lightweight, high-performance CAPTCHA task distribution — publish tasks, distribute to workers, and collect results with Captcha AI.

Use NATS messaging for lightweight, high-performance CAPTCHA task distribution — publish tasks, distribute to...

Automation Python All CAPTCHA Types
Jan 24, 2026
DevOps & Scaling Building Event-Driven CAPTCHA Solving with AWS SNS and CaptchaAI
Build an event-driven CAPTCHA solving pipeline using AWS SNS for fan-out notifications and Captcha AI — decouple task submission from result processing.

Build an event-driven CAPTCHA solving pipeline using AWS SNS for fan-out notifications and Captcha AI — decoup...

Automation Python All CAPTCHA Types
Jan 21, 2026
DevOps & Scaling Disaster Recovery Planning for CAPTCHA Solving Pipelines
Build a disaster recovery plan for CAPTCHA solving pipelines — RPO/RTO targets, backup strategies, failover automation, and recovery runbooks with Captcha AI.

Build a disaster recovery plan for CAPTCHA solving pipelines — RPO/RTO targets, backup strategies, failover au...

Automation Python All CAPTCHA Types
Jan 20, 2026