Your CAPTCHA solving pipeline targets 95% success rate. Last week it was 94.2%. Is that a problem? Without an error budget, you can't answer that question quantitatively. An error budget tells you exactly how many failures you can tolerate before reliability falls below your SLO — and what to do when the budget runs out.
Error Budget Basics
| Concept | Definition | Example |
|---|---|---|
| SLO | Target success rate | 95% successful solves |
| Error budget | Allowed failure rate | 5% of total solves can fail |
| Burn rate | How fast budget is consumed | 2× means budget exhausted in half the window |
| Window | Measurement period | Rolling 24 hours or 7 days |
If your SLO is 95% over a 24-hour window with 10,000 solves, your error budget is 500 failures. Once you hit 500 failures, new deployments or risky changes should stop.
Python: Error Budget Tracker
import time
import threading
from dataclasses import dataclass, field
from collections import deque
from enum import Enum
API_KEY = "YOUR_API_KEY"
class BudgetStatus(Enum):
HEALTHY = "healthy" # Budget > 50% remaining
WARNING = "warning" # Budget 10-50% remaining
CRITICAL = "critical" # Budget < 10% remaining
EXHAUSTED = "exhausted" # Budget depleted
@dataclass
class SLOConfig:
"""Service Level Objective configuration."""
target_success_rate: float = 0.95 # 95%
window_seconds: int = 86400 # 24 hours
warning_threshold: float = 0.50 # Alert at 50% budget
critical_threshold: float = 0.10 # Alert at 10% budget
@dataclass
class ErrorBudgetEvent:
timestamp: float
success: bool
class ErrorBudgetTracker:
"""Tracks error budget consumption for CAPTCHA solving."""
def __init__(self, config: SLOConfig = SLOConfig()):
self.config = config
self._events: deque[ErrorBudgetEvent] = deque()
self._lock = threading.Lock()
self._callbacks: dict[BudgetStatus, list[callable]] = {
status: [] for status in BudgetStatus
}
self._last_status = BudgetStatus.HEALTHY
def on_status_change(self, status: BudgetStatus, callback: callable):
"""Register a callback for status transitions."""
self._callbacks[status].append(callback)
def record(self, success: bool):
"""Record a solve attempt."""
now = time.monotonic()
event = ErrorBudgetEvent(timestamp=now, success=success)
with self._lock:
self._events.append(event)
self._prune(now)
new_status = self._compute_status()
if new_status != self._last_status:
self._last_status = new_status
for cb in self._callbacks.get(new_status, []):
try:
cb(self.get_report())
except Exception as e:
print(f"[BUDGET] Callback error: {e}")
def _prune(self, now: float):
"""Remove events outside the window."""
cutoff = now - self.config.window_seconds
while self._events and self._events[0].timestamp < cutoff:
self._events.popleft()
def _compute_status(self) -> BudgetStatus:
remaining = self.remaining_fraction
if remaining <= 0:
return BudgetStatus.EXHAUSTED
if remaining < self.config.critical_threshold:
return BudgetStatus.CRITICAL
if remaining < self.config.warning_threshold:
return BudgetStatus.WARNING
return BudgetStatus.HEALTHY
@property
def total_events(self) -> int:
with self._lock:
return len(self._events)
@property
def success_count(self) -> int:
with self._lock:
return sum(1 for e in self._events if e.success)
@property
def failure_count(self) -> int:
with self._lock:
return sum(1 for e in self._events if not e.success)
@property
def current_success_rate(self) -> float:
total = self.total_events
return self.success_count / total if total > 0 else 1.0
@property
def error_budget_total(self) -> float:
"""Total allowed failures in the window."""
total = self.total_events
if total == 0:
return 0
return total * (1 - self.config.target_success_rate)
@property
def error_budget_remaining(self) -> float:
"""Remaining failure allowance."""
return max(0, self.error_budget_total - self.failure_count)
@property
def remaining_fraction(self) -> float:
"""Fraction of error budget remaining (0.0 to 1.0)."""
budget = self.error_budget_total
if budget <= 0:
return 1.0 if self.failure_count == 0 else 0.0
return max(0, self.error_budget_remaining / budget)
@property
def burn_rate(self) -> float:
"""How fast the budget is being consumed (1.0 = normal, 2.0 = 2× faster)."""
total = self.total_events
if total == 0:
return 0.0
expected_failures = total * (1 - self.config.target_success_rate)
if expected_failures == 0:
return 0.0
return self.failure_count / expected_failures
def get_report(self) -> dict:
return {
"status": self._last_status.value,
"slo_target": self.config.target_success_rate,
"current_rate": round(self.current_success_rate, 4),
"total_events": self.total_events,
"successes": self.success_count,
"failures": self.failure_count,
"budget_total": round(self.error_budget_total, 1),
"budget_remaining": round(self.error_budget_remaining, 1),
"budget_remaining_pct": round(self.remaining_fraction * 100, 1),
"burn_rate": round(self.burn_rate, 2),
}
# --- Integration with solver ---
budget = ErrorBudgetTracker(SLOConfig(
target_success_rate=0.95,
window_seconds=3600, # 1-hour window for demo
))
# Register alerts
budget.on_status_change(BudgetStatus.WARNING, lambda r:
print(f"[ALERT] Budget warning: {r['budget_remaining_pct']}% remaining"))
budget.on_status_change(BudgetStatus.CRITICAL, lambda r:
print(f"[ALERT] Budget critical: {r['budget_remaining_pct']}% remaining"))
budget.on_status_change(BudgetStatus.EXHAUSTED, lambda r:
print(f"[ALERT] Budget EXHAUSTED — throttle new requests"))
def solve_with_budget(params: dict) -> str:
"""Solve CAPTCHA while tracking error budget."""
import requests
if budget._last_status == BudgetStatus.EXHAUSTED:
raise RuntimeError("Error budget exhausted — solving paused")
try:
submit_params = {**params, "key": API_KEY, "json": 1}
resp = requests.post(
"https://ocr.captchaai.com/in.php", data=submit_params, timeout=30
).json()
if resp.get("status") != 1:
budget.record(False)
raise RuntimeError(f"Submit: {resp.get('request')}")
task_id = resp["request"]
start = time.monotonic()
while time.monotonic() - start < 180:
time.sleep(5)
poll = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get", "id": task_id, "json": 1,
}, timeout=15).json()
if poll.get("request") == "CAPCHA_NOT_READY":
continue
if poll.get("status") == 1:
budget.record(True)
return poll["request"]
budget.record(False)
raise RuntimeError(f"Solve: {poll.get('request')}")
budget.record(False)
raise RuntimeError("Timeout")
except Exception:
budget.record(False)
raise
# Usage
for i in range(100):
try:
token = solve_with_budget({
"method": "turnstile",
"sitekey": "0x4XXXXXXXXXXXXXXXXX",
"pageurl": "https://example.com",
})
except RuntimeError as e:
if "exhausted" in str(e):
print(f"Stopped at iteration {i}")
break
print(budget.get_report())
JavaScript: Error Budget Tracker
class ErrorBudgetTracker {
#events = [];
#config;
#callbacks = {};
constructor(config = {}) {
this.#config = {
targetRate: config.targetRate || 0.95,
windowMs: config.windowMs || 3600_000,
warningThreshold: config.warningThreshold || 0.5,
criticalThreshold: config.criticalThreshold || 0.1,
};
this.lastStatus = "healthy";
}
on(status, callback) {
this.#callbacks[status] = this.#callbacks[status] || [];
this.#callbacks[status].push(callback);
}
record(success) {
const now = Date.now();
this.#events.push({ time: now, success });
this.#prune(now);
const newStatus = this.#computeStatus();
if (newStatus !== this.lastStatus) {
this.lastStatus = newStatus;
for (const cb of this.#callbacks[newStatus] || []) {
cb(this.report());
}
}
}
#prune(now) {
const cutoff = now - this.#config.windowMs;
while (this.#events.length && this.#events[0].time < cutoff) {
this.#events.shift();
}
}
#computeStatus() {
const frac = this.remainingFraction;
if (frac <= 0) return "exhausted";
if (frac < this.#config.criticalThreshold) return "critical";
if (frac < this.#config.warningThreshold) return "warning";
return "healthy";
}
get total() { return this.#events.length; }
get successes() { return this.#events.filter((e) => e.success).length; }
get failures() { return this.#events.filter((e) => !e.success).length; }
get currentRate() { return this.total ? this.successes / this.total : 1; }
get budgetTotal() {
return this.total * (1 - this.#config.targetRate);
}
get budgetRemaining() {
return Math.max(0, this.budgetTotal - this.failures);
}
get remainingFraction() {
const bt = this.budgetTotal;
if (bt <= 0) return this.failures === 0 ? 1 : 0;
return Math.max(0, this.budgetRemaining / bt);
}
get burnRate() {
const expected = this.total * (1 - this.#config.targetRate);
return expected > 0 ? this.failures / expected : 0;
}
report() {
return {
status: this.lastStatus,
currentRate: Math.round(this.currentRate * 10000) / 10000,
total: this.total,
failures: this.failures,
budgetRemainingPct: Math.round(this.remainingFraction * 1000) / 10,
burnRate: Math.round(this.burnRate * 100) / 100,
};
}
}
// Usage
const budget = new ErrorBudgetTracker({ targetRate: 0.95, windowMs: 3600_000 });
budget.on("warning", (r) => console.log(`[WARN] ${r.budgetRemainingPct}% budget left`));
budget.on("exhausted", (r) => console.log("[ALERT] Budget exhausted!"));
// Record results from your solver
budget.record(true); // success
budget.record(false); // failure
console.log(budget.report());
Burn Rate Alerts
| Burn rate | Meaning | Action |
|---|---|---|
| < 1.0 | Consuming slower than expected | No action needed |
| 1.0 | On pace to exhaust at window end | Monitor closely |
| 2.0 | Budget exhausted in half the window | Investigate and slow down |
| 5.0+ | Rapid budget consumption | Pause non-critical solves |
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| Budget exhausted too quickly | SLO too tight for actual conditions | Set a realistic SLO based on historical data |
| Budget never consumed | SLO too generous | Tighten SLO to drive reliability improvements |
| Status flaps between states | Window too short | Use a longer measurement window (24h vs 1h) |
| Burn rate misleading at low volume | Few events skew the calculation | Require minimum event count before calculating burn rate |
| Budget tracker memory grows | Events not pruned | Verify _prune runs on every record() call |
FAQ
What's a realistic SLO for CAPTCHA solving?
Depends on the CAPTCHA type. reCAPTCHA v2 typically achieves 90–95% solve rates. Turnstile may be higher. Image CAPTCHAs vary. Start by measuring your current success rate, then set your SLO 2–3% below that baseline to create a meaningful error budget.
What should happen when the error budget is exhausted?
Options from least to most aggressive: alert the team, throttle new requests, pause non-essential solves, switch to manual CAPTCHA handling. Never silently ignore an exhausted budget.
How do I handle error budget across multiple CAPTCHA types?
Track separate budgets per type. reCAPTCHA may have a 93% SLO while Turnstile has 97%. Aggregating them into one budget hides type-specific problems.
Related Articles
Next Steps
Track your CAPTCHA solving reliability quantitatively — get your CaptchaAI API key and implement error budget tracking.
Related guides:
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.