Error Budget Tracking for CAPTCHA Solve Reliability

Your CAPTCHA solving pipeline targets 95% success rate. Last week it was 94.2%. Is that a problem? Without an error budget, you can't answer that question quantitatively. An error budget tells you exactly how many failures you can tolerate before reliability falls below your SLO — and what to do when the budget runs out.

Error Budget Basics

Concept	Definition	Example
SLO	Target success rate	95% successful solves
Error budget	Allowed failure rate	5% of total solves can fail
Burn rate	How fast budget is consumed	2× means budget exhausted in half the window
Window	Measurement period	Rolling 24 hours or 7 days

If your SLO is 95% over a 24-hour window with 10,000 solves, your error budget is 500 failures. Once you hit 500 failures, new deployments or risky changes should stop.

Python: Error Budget Tracker

import time
import threading
from dataclasses import dataclass, field
from collections import deque
from enum import Enum

API_KEY = "YOUR_API_KEY"


class BudgetStatus(Enum):
    HEALTHY = "healthy"          # Budget > 50% remaining
    WARNING = "warning"          # Budget 10-50% remaining
    CRITICAL = "critical"        # Budget < 10% remaining
    EXHAUSTED = "exhausted"      # Budget depleted


@dataclass
class SLOConfig:
    """Service Level Objective configuration."""
    target_success_rate: float = 0.95  # 95%
    window_seconds: int = 86400        # 24 hours
    warning_threshold: float = 0.50    # Alert at 50% budget
    critical_threshold: float = 0.10   # Alert at 10% budget


@dataclass
class ErrorBudgetEvent:
    timestamp: float
    success: bool


class ErrorBudgetTracker:
    """Tracks error budget consumption for CAPTCHA solving."""

    def __init__(self, config: SLOConfig = SLOConfig()):
        self.config = config
        self._events: deque[ErrorBudgetEvent] = deque()
        self._lock = threading.Lock()
        self._callbacks: dict[BudgetStatus, list[callable]] = {
            status: [] for status in BudgetStatus
        }
        self._last_status = BudgetStatus.HEALTHY

    def on_status_change(self, status: BudgetStatus, callback: callable):
        """Register a callback for status transitions."""
        self._callbacks[status].append(callback)

    def record(self, success: bool):
        """Record a solve attempt."""
        now = time.monotonic()
        event = ErrorBudgetEvent(timestamp=now, success=success)

        with self._lock:
            self._events.append(event)
            self._prune(now)
            new_status = self._compute_status()

            if new_status != self._last_status:
                self._last_status = new_status
                for cb in self._callbacks.get(new_status, []):
                    try:
                        cb(self.get_report())
                    except Exception as e:
                        print(f"[BUDGET] Callback error: {e}")

    def _prune(self, now: float):
        """Remove events outside the window."""
        cutoff = now - self.config.window_seconds
        while self._events and self._events[0].timestamp < cutoff:
            self._events.popleft()

    def _compute_status(self) -> BudgetStatus:
        remaining = self.remaining_fraction
        if remaining <= 0:
            return BudgetStatus.EXHAUSTED
        if remaining < self.config.critical_threshold:
            return BudgetStatus.CRITICAL
        if remaining < self.config.warning_threshold:
            return BudgetStatus.WARNING
        return BudgetStatus.HEALTHY

    @property
    def total_events(self) -> int:
        with self._lock:
            return len(self._events)

    @property
    def success_count(self) -> int:
        with self._lock:
            return sum(1 for e in self._events if e.success)

    @property
    def failure_count(self) -> int:
        with self._lock:
            return sum(1 for e in self._events if not e.success)

    @property
    def current_success_rate(self) -> float:
        total = self.total_events
        return self.success_count / total if total > 0 else 1.0

    @property
    def error_budget_total(self) -> float:
        """Total allowed failures in the window."""
        total = self.total_events
        if total == 0:
            return 0
        return total * (1 - self.config.target_success_rate)

    @property
    def error_budget_remaining(self) -> float:
        """Remaining failure allowance."""
        return max(0, self.error_budget_total - self.failure_count)

    @property
    def remaining_fraction(self) -> float:
        """Fraction of error budget remaining (0.0 to 1.0)."""
        budget = self.error_budget_total
        if budget <= 0:
            return 1.0 if self.failure_count == 0 else 0.0
        return max(0, self.error_budget_remaining / budget)

    @property
    def burn_rate(self) -> float:
        """How fast the budget is being consumed (1.0 = normal, 2.0 = 2× faster)."""
        total = self.total_events
        if total == 0:
            return 0.0
        expected_failures = total * (1 - self.config.target_success_rate)
        if expected_failures == 0:
            return 0.0
        return self.failure_count / expected_failures

    def get_report(self) -> dict:
        return {
            "status": self._last_status.value,
            "slo_target": self.config.target_success_rate,
            "current_rate": round(self.current_success_rate, 4),
            "total_events": self.total_events,
            "successes": self.success_count,
            "failures": self.failure_count,
            "budget_total": round(self.error_budget_total, 1),
            "budget_remaining": round(self.error_budget_remaining, 1),
            "budget_remaining_pct": round(self.remaining_fraction * 100, 1),
            "burn_rate": round(self.burn_rate, 2),
        }


# --- Integration with solver ---

budget = ErrorBudgetTracker(SLOConfig(
    target_success_rate=0.95,
    window_seconds=3600,  # 1-hour window for demo
))

# Register alerts
budget.on_status_change(BudgetStatus.WARNING, lambda r:
    print(f"[ALERT] Budget warning: {r['budget_remaining_pct']}% remaining"))

budget.on_status_change(BudgetStatus.CRITICAL, lambda r:
    print(f"[ALERT] Budget critical: {r['budget_remaining_pct']}% remaining"))

budget.on_status_change(BudgetStatus.EXHAUSTED, lambda r:
    print(f"[ALERT] Budget EXHAUSTED — throttle new requests"))


def solve_with_budget(params: dict) -> str:
    """Solve CAPTCHA while tracking error budget."""
    import requests

    if budget._last_status == BudgetStatus.EXHAUSTED:
        raise RuntimeError("Error budget exhausted — solving paused")

    try:
        submit_params = {**params, "key": API_KEY, "json": 1}
        resp = requests.post(
            "https://ocr.captchaai.com/in.php", data=submit_params, timeout=30
        ).json()
        if resp.get("status") != 1:
            budget.record(False)
            raise RuntimeError(f"Submit: {resp.get('request')}")

        task_id = resp["request"]
        start = time.monotonic()
        while time.monotonic() - start < 180:
            time.sleep(5)
            poll = requests.get("https://ocr.captchaai.com/res.php", params={
                "key": API_KEY, "action": "get", "id": task_id, "json": 1,
            }, timeout=15).json()

            if poll.get("request") == "CAPCHA_NOT_READY":
                continue
            if poll.get("status") == 1:
                budget.record(True)
                return poll["request"]

            budget.record(False)
            raise RuntimeError(f"Solve: {poll.get('request')}")

        budget.record(False)
        raise RuntimeError("Timeout")

    except Exception:
        budget.record(False)
        raise


# Usage
for i in range(100):
    try:
        token = solve_with_budget({
            "method": "turnstile",
            "sitekey": "0x4XXXXXXXXXXXXXXXXX",
            "pageurl": "https://example.com",
        })
    except RuntimeError as e:
        if "exhausted" in str(e):
            print(f"Stopped at iteration {i}")
            break

print(budget.get_report())

JavaScript: Error Budget Tracker

class ErrorBudgetTracker {
  #events = [];
  #config;
  #callbacks = {};

  constructor(config = {}) {
    this.#config = {
      targetRate: config.targetRate || 0.95,
      windowMs: config.windowMs || 3600_000,
      warningThreshold: config.warningThreshold || 0.5,
      criticalThreshold: config.criticalThreshold || 0.1,
    };
    this.lastStatus = "healthy";
  }

  on(status, callback) {
    this.#callbacks[status] = this.#callbacks[status] || [];
    this.#callbacks[status].push(callback);
  }

  record(success) {
    const now = Date.now();
    this.#events.push({ time: now, success });
    this.#prune(now);

    const newStatus = this.#computeStatus();
    if (newStatus !== this.lastStatus) {
      this.lastStatus = newStatus;
      for (const cb of this.#callbacks[newStatus] || []) {
        cb(this.report());
      }
    }
  }

  #prune(now) {
    const cutoff = now - this.#config.windowMs;
    while (this.#events.length && this.#events[0].time < cutoff) {
      this.#events.shift();
    }
  }

  #computeStatus() {
    const frac = this.remainingFraction;
    if (frac <= 0) return "exhausted";
    if (frac < this.#config.criticalThreshold) return "critical";
    if (frac < this.#config.warningThreshold) return "warning";
    return "healthy";
  }

  get total() { return this.#events.length; }
  get successes() { return this.#events.filter((e) => e.success).length; }
  get failures() { return this.#events.filter((e) => !e.success).length; }
  get currentRate() { return this.total ? this.successes / this.total : 1; }

  get budgetTotal() {
    return this.total * (1 - this.#config.targetRate);
  }

  get budgetRemaining() {
    return Math.max(0, this.budgetTotal - this.failures);
  }

  get remainingFraction() {
    const bt = this.budgetTotal;
    if (bt <= 0) return this.failures === 0 ? 1 : 0;
    return Math.max(0, this.budgetRemaining / bt);
  }

  get burnRate() {
    const expected = this.total * (1 - this.#config.targetRate);
    return expected > 0 ? this.failures / expected : 0;
  }

  report() {
    return {
      status: this.lastStatus,
      currentRate: Math.round(this.currentRate * 10000) / 10000,
      total: this.total,
      failures: this.failures,
      budgetRemainingPct: Math.round(this.remainingFraction * 1000) / 10,
      burnRate: Math.round(this.burnRate * 100) / 100,
    };
  }
}

// Usage
const budget = new ErrorBudgetTracker({ targetRate: 0.95, windowMs: 3600_000 });

budget.on("warning", (r) => console.log(`[WARN] ${r.budgetRemainingPct}% budget left`));
budget.on("exhausted", (r) => console.log("[ALERT] Budget exhausted!"));

// Record results from your solver
budget.record(true);   // success
budget.record(false);  // failure
console.log(budget.report());

Burn Rate Alerts

Burn rate	Meaning	Action
< 1.0	Consuming slower than expected	No action needed
1.0	On pace to exhaust at window end	Monitor closely
2.0	Budget exhausted in half the window	Investigate and slow down
5.0+	Rapid budget consumption	Pause non-critical solves

Troubleshooting

Issue	Cause	Fix
Budget exhausted too quickly	SLO too tight for actual conditions	Set a realistic SLO based on historical data
Budget never consumed	SLO too generous	Tighten SLO to drive reliability improvements
Status flaps between states	Window too short	Use a longer measurement window (24h vs 1h)
Burn rate misleading at low volume	Few events skew the calculation	Require minimum event count before calculating burn rate
Budget tracker memory grows	Events not pruned	Verify `_prune` runs on every `record()` call

FAQ

What's a realistic SLO for CAPTCHA solving?

Depends on the CAPTCHA type. reCAPTCHA v2 typically achieves 90–95% solve rates. Turnstile may be higher. Image CAPTCHAs vary. Start by measuring your current success rate, then set your SLO 2–3% below that baseline to create a meaningful error budget.

What should happen when the error budget is exhausted?

Options from least to most aggressive: alert the team, throttle new requests, pause non-essential solves, switch to manual CAPTCHA handling. Never silently ignore an exhausted budget.