Health Check Endpoints for CAPTCHA Solving Workers

Your CAPTCHA solving worker looks alive — the process is running — but it hasn't successfully solved a task in 10 minutes. The API key is exhausted, or the worker is stuck in a loop. Without health checks, your orchestrator keeps routing work to a dead worker. Health endpoints let load balancers and Kubernetes detect the problem and reroute.

Three Types of Health Checks

Check	Question	Failure response
Liveness	Is the process running?	Restart the container
Readiness	Can it accept work?	Stop routing traffic
Dependency	Are upstream services OK?	Degrade gracefully

Python: Flask Health Endpoints

import requests
import time
import threading
from flask import Flask, jsonify
from dataclasses import dataclass, field

API_KEY = "YOUR_API_KEY"
RESULT_URL = "https://ocr.captchaai.com/res.php"

app = Flask(__name__)


@dataclass
class WorkerHealth:
    """Tracks worker health metrics."""
    started_at: float = field(default_factory=time.monotonic)
    last_solve_at: float = 0.0
    total_solved: int = 0
    total_failed: int = 0
    consecutive_failures: int = 0
    balance: float | None = None
    balance_checked_at: float = 0.0
    _lock: threading.Lock = field(default_factory=threading.Lock)

    def record_success(self):
        with self._lock:
            self.total_solved += 1
            self.last_solve_at = time.monotonic()
            self.consecutive_failures = 0

    def record_failure(self):
        with self._lock:
            self.total_failed += 1
            self.consecutive_failures += 1

    @property
    def success_rate(self) -> float:
        total = self.total_solved + self.total_failed
        return self.total_solved / total if total > 0 else 1.0

    @property
    def seconds_since_last_solve(self) -> float:
        if self.last_solve_at == 0:
            return time.monotonic() - self.started_at
        return time.monotonic() - self.last_solve_at


health = WorkerHealth()

# Thresholds
MAX_CONSECUTIVE_FAILURES = 10
MAX_SECONDS_WITHOUT_SOLVE = 600  # 10 minutes
MIN_BALANCE = 1.0


def check_balance() -> float | None:
    """Check CaptchaAI balance."""
    now = time.monotonic()
    # Cache balance for 60 seconds
    if health.balance is not None and now - health.balance_checked_at < 60:
        return health.balance

    try:
        resp = requests.get(RESULT_URL, params={
            "key": API_KEY, "action": "getbalance", "json": 1,
        }, timeout=10).json()
        health.balance = float(resp.get("request", 0))
        health.balance_checked_at = now
        return health.balance
    except Exception:
        return health.balance  # Return cached value on error


@app.route("/health/live")
def liveness():
    """Liveness probe — is the process responsive?"""
    return jsonify({"status": "ok", "uptime_s": int(time.monotonic() - health.started_at)}), 200


@app.route("/health/ready")
def readiness():
    """Readiness probe — can the worker accept tasks?"""
    issues = []

    # Check consecutive failures
    if health.consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
        issues.append(f"consecutive_failures={health.consecutive_failures}")

    # Check time since last solve
    if health.total_solved > 0 and health.seconds_since_last_solve > MAX_SECONDS_WITHOUT_SOLVE:
        issues.append(f"no_solve_for={int(health.seconds_since_last_solve)}s")

    # Check balance
    balance = check_balance()
    if balance is not None and balance < MIN_BALANCE:
        issues.append(f"low_balance=${balance:.2f}")

    if issues:
        return jsonify({
            "status": "not_ready",
            "issues": issues,
            "stats": {
                "solved": health.total_solved,
                "failed": health.total_failed,
                "success_rate": round(health.success_rate, 3),
            },
        }), 503

    return jsonify({
        "status": "ready",
        "stats": {
            "solved": health.total_solved,
            "failed": health.total_failed,
            "success_rate": round(health.success_rate, 3),
            "balance": balance,
        },
    }), 200


@app.route("/health/dependencies")
def dependencies():
    """Check upstream dependencies."""
    checks = {}

    # CaptchaAI API reachability
    try:
        resp = requests.get(RESULT_URL, params={
            "key": API_KEY, "action": "getbalance", "json": 1,
        }, timeout=10)
        checks["captchaai_api"] = {
            "status": "ok" if resp.status_code == 200 else "degraded",
            "response_ms": int(resp.elapsed.total_seconds() * 1000),
        }
    except Exception as e:
        checks["captchaai_api"] = {"status": "down", "error": str(e)}

    all_ok = all(c["status"] == "ok" for c in checks.values())
    return jsonify({
        "status": "ok" if all_ok else "degraded",
        "checks": checks,
    }), 200 if all_ok else 503


# --- Worker loop (runs in background) ---

def worker_loop():
    """Simulated CAPTCHA solving worker."""
    while True:
        try:
            # ... solve CAPTCHA logic ...
            health.record_success()
        except Exception:
            health.record_failure()
        time.sleep(1)


threading.Thread(target=worker_loop, daemon=True).start()

JavaScript: Express Health Endpoints

const express = require("express");

const API_KEY = "YOUR_API_KEY";
const RESULT_URL = "https://ocr.captchaai.com/res.php";

const app = express();

const health = {
  startedAt: Date.now(),
  lastSolveAt: 0,
  totalSolved: 0,
  totalFailed: 0,
  consecutiveFailures: 0,
  balance: null,
  balanceCheckedAt: 0,

  recordSuccess() {
    this.totalSolved++;
    this.lastSolveAt = Date.now();
    this.consecutiveFailures = 0;
  },

  recordFailure() {
    this.totalFailed++;
    this.consecutiveFailures++;
  },

  get successRate() {
    const total = this.totalSolved + this.totalFailed;
    return total > 0 ? this.totalSolved / total : 1;
  },
};

async function checkBalance() {
  if (health.balance !== null && Date.now() - health.balanceCheckedAt < 60000) {
    return health.balance;
  }
  try {
    const url = `${RESULT_URL}?key=${API_KEY}&action=getbalance&json=1`;
    const resp = await (await fetch(url)).json();
    health.balance = parseFloat(resp.request);
    health.balanceCheckedAt = Date.now();
    return health.balance;
  } catch {
    return health.balance;
  }
}

app.get("/health/live", (req, res) => {
  res.json({ status: "ok", uptimeMs: Date.now() - health.startedAt });
});

app.get("/health/ready", async (req, res) => {
  const issues = [];

  if (health.consecutiveFailures >= 10) {
    issues.push(`consecutive_failures=${health.consecutiveFailures}`);
  }

  if (health.totalSolved > 0) {
    const silentMs = Date.now() - health.lastSolveAt;
    if (silentMs > 600_000) {
      issues.push(`no_solve_for=${Math.round(silentMs / 1000)}s`);
    }
  }

  const balance = await checkBalance();
  if (balance !== null && balance < 1.0) {
    issues.push(`low_balance=$${balance.toFixed(2)}`);
  }

  const stats = {
    solved: health.totalSolved,
    failed: health.totalFailed,
    successRate: Math.round(health.successRate * 1000) / 1000,
    balance,
  };

  if (issues.length > 0) {
    return res.status(503).json({ status: "not_ready", issues, stats });
  }
  res.json({ status: "ready", stats });
});

app.get("/health/dependencies", async (req, res) => {
  const checks = {};
  try {
    const start = Date.now();
    const url = `${RESULT_URL}?key=${API_KEY}&action=getbalance&json=1`;
    const resp = await fetch(url);
    checks.captchaaiApi = {
      status: resp.ok ? "ok" : "degraded",
      responseMs: Date.now() - start,
    };
  } catch (e) {
    checks.captchaaiApi = { status: "down", error: e.message };
  }

  const allOk = Object.values(checks).every((c) => c.status === "ok");
  res.status(allOk ? 200 : 503).json({
    status: allOk ? "ok" : "degraded",
    checks,
  });
});

app.listen(8080, () => console.log("Health server on :8080"));

Kubernetes Configuration

apiVersion: apps/v1
kind: Deployment
metadata:
  name: captcha-worker
spec:
  replicas: 3
  template:
    spec:
      containers:

        - name: worker
          image: captcha-worker:latest
          ports:

            - containerPort: 8080
          livenessProbe:
            httpGet:
              path: /health/live
              port: 8080
            initialDelaySeconds: 10
            periodSeconds: 15
            failureThreshold: 3
          readinessProbe:
            httpGet:
              path: /health/ready
              port: 8080
            initialDelaySeconds: 5
            periodSeconds: 10
            failureThreshold: 2

Health Check Response Codes

Endpoint	200	503
`/health/live`	Process responsive	Process frozen — restart
`/health/ready`	Can accept work	Stop sending tasks
`/health/dependencies`	All dependencies OK	Upstream degraded

Troubleshooting

Issue	Cause	Fix
Worker constantly restarted	Liveness threshold too low	Increase `failureThreshold` or `periodSeconds`
Worker marked not-ready during startup	No solves yet counted as "too long"	Only check `seconds_since_last_solve` after first solve
Balance check slows health endpoint	API call on every request	Cache the balance with a TTL (60s recommended)
Health endpoint itself crashes	Unhandled exception in check	Wrap each check in try/except; return degraded instead of 500
False negatives from dependency check	Network blip during balance check	Use cached values with a stale-while-revalidate approach

FAQ

How often should Kubernetes probe the health endpoints?

Liveness: every 10–30 seconds with a failure threshold of 3. Readiness: every 5–10 seconds with a failure threshold of 2. More frequent probes detect issues faster but add overhead.

Should the health endpoint hit the CaptchaAI API?

Only for the readiness and dependency checks, and always cache the result. The liveness probe should never make external calls — it must respond instantly to prove the process is alive.

How do I monitor health across multiple workers?

Expose health metrics in Prometheus format (/metrics) alongside the health endpoints. Aggregate across workers with Grafana dashboards to see fleet-wide health.