Production CAPTCHA solving needs structured logging for cost tracking, debugging, and compliance. Here's how to implement it properly.
Structured Logging Setup
import logging
import json
import time
from datetime import datetime, timezone
class CaptchaLogger:
"""Structured logger for CAPTCHA operations."""
def __init__(self, logger_name="captchaai"):
self.logger = logging.getLogger(logger_name)
self.logger.setLevel(logging.INFO)
# JSON formatter for structured logs
handler = logging.StreamHandler()
handler.setFormatter(JsonFormatter())
self.logger.addHandler(handler)
def log_submit(self, task_id, captcha_type, params):
"""Log CAPTCHA task submission."""
safe_params = {k: v for k, v in params.items() if k != "key"}
self.logger.info(json.dumps({
"event": "captcha_submit",
"task_id": task_id,
"type": captcha_type,
"params": safe_params,
"timestamp": datetime.now(timezone.utc).isoformat(),
}))
def log_result(self, task_id, success, duration_ms, error=None):
"""Log CAPTCHA solve result."""
self.logger.info(json.dumps({
"event": "captcha_result",
"task_id": task_id,
"success": success,
"duration_ms": duration_ms,
"error": error,
"timestamp": datetime.now(timezone.utc).isoformat(),
}))
def log_cost(self, task_id, captcha_type, cost):
"""Log CAPTCHA solve cost."""
self.logger.info(json.dumps({
"event": "captcha_cost",
"task_id": task_id,
"type": captcha_type,
"cost_usd": cost,
"timestamp": datetime.now(timezone.utc).isoformat(),
}))
class JsonFormatter(logging.Formatter):
"""Format log records as JSON."""
def format(self, record):
return record.getMessage()
Audited Solver
import requests
import time
class AuditedSolver:
"""Solver with built-in logging and auditing."""
COST_ESTIMATES = {
"recaptcha_v2": 0.003,
"recaptcha_v3": 0.004,
"turnstile": 0.002,
"geetest": 0.003,
"image": 0.001,
}
def __init__(self, api_key, logger=None):
self.api_key = api_key
self.log = logger or CaptchaLogger()
self.stats = SolveStats()
def solve(self, captcha_type, params):
"""Solve CAPTCHA with full audit trail."""
start = time.time()
# Submit
data = {"key": self.api_key, "json": 1, **params}
resp = requests.post(
"https://ocr.captchaai.com/in.php", data=data, timeout=30,
)
result = resp.json()
if result.get("status") != 1:
error = result.get("request", "UNKNOWN")
self.log.log_result(None, False, 0, error=error)
self.stats.record_failure(captcha_type, error)
raise RuntimeError(f"Submit error: {error}")
task_id = result["request"]
self.log.log_submit(task_id, captcha_type, params)
# Poll
time.sleep(10)
for _ in range(24):
resp = requests.get("https://ocr.captchaai.com/res.php", params={
"key": self.api_key, "action": "get",
"id": task_id, "json": 1,
}, timeout=15)
data = resp.json()
if data.get("status") == 1:
duration_ms = int((time.time() - start) * 1000)
cost = self.COST_ESTIMATES.get(captcha_type, 0.003)
self.log.log_result(task_id, True, duration_ms)
self.log.log_cost(task_id, captcha_type, cost)
self.stats.record_success(captcha_type, duration_ms, cost)
return data["request"]
if data["request"] != "CAPCHA_NOT_READY":
duration_ms = int((time.time() - start) * 1000)
self.log.log_result(task_id, False, duration_ms, error=data["request"])
self.stats.record_failure(captcha_type, data["request"])
raise RuntimeError(data["request"])
time.sleep(5)
duration_ms = int((time.time() - start) * 1000)
self.log.log_result(task_id, False, duration_ms, error="TIMEOUT")
self.stats.record_failure(captcha_type, "TIMEOUT")
raise TimeoutError("Solve timeout")
class SolveStats:
"""Track aggregate solve statistics."""
def __init__(self):
self.successes = 0
self.failures = 0
self.total_cost = 0.0
self.total_duration_ms = 0
self.errors = {}
self.by_type = {}
def record_success(self, captcha_type, duration_ms, cost):
self.successes += 1
self.total_cost += cost
self.total_duration_ms += duration_ms
if captcha_type not in self.by_type:
self.by_type[captcha_type] = {"success": 0, "fail": 0, "cost": 0.0}
self.by_type[captcha_type]["success"] += 1
self.by_type[captcha_type]["cost"] += cost
def record_failure(self, captcha_type, error):
self.failures += 1
self.errors[error] = self.errors.get(error, 0) + 1
if captcha_type not in self.by_type:
self.by_type[captcha_type] = {"success": 0, "fail": 0, "cost": 0.0}
self.by_type[captcha_type]["fail"] += 1
def get_summary(self):
total = self.successes + self.failures
return {
"total_solves": total,
"success_rate": f"{self.successes / total * 100:.1f}%" if total else "N/A",
"total_cost": f"${self.total_cost:.4f}",
"avg_duration_ms": self.total_duration_ms // max(self.successes, 1),
"errors": self.errors,
"by_type": self.by_type,
}
File-Based Audit Log
import csv
import os
from datetime import datetime, timezone
class AuditLog:
"""Append-only CSV audit log for compliance."""
def __init__(self, log_dir="logs"):
os.makedirs(log_dir, exist_ok=True)
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
self.filepath = os.path.join(log_dir, f"captcha_audit_{date}.csv")
self._ensure_header()
def _ensure_header(self):
"""Write CSV header if file is new."""
if not os.path.exists(self.filepath):
with open(self.filepath, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow([
"timestamp", "event", "task_id", "captcha_type",
"success", "duration_ms", "cost_usd", "error",
])
def record(self, event, task_id, captcha_type, success=None,
duration_ms=None, cost_usd=None, error=None):
"""Append a record to the audit log."""
with open(self.filepath, "a", newline="") as f:
writer = csv.writer(f)
writer.writerow([
datetime.now(timezone.utc).isoformat(),
event, task_id, captcha_type,
success, duration_ms, cost_usd, error,
])
# Usage
audit = AuditLog()
audit.record("submit", "12345", "recaptcha_v2")
audit.record("result", "12345", "recaptcha_v2", success=True, duration_ms=15200, cost_usd=0.003)
Usage Reporting
def generate_daily_report(stats):
"""Generate daily usage report."""
summary = stats.get_summary()
report = []
report.append("=" * 50)
report.append("CaptchaAI Daily Usage Report")
report.append("=" * 50)
report.append(f"Total solves: {summary['total_solves']}")
report.append(f"Success rate: {summary['success_rate']}")
report.append(f"Total cost: {summary['total_cost']}")
report.append(f"Avg duration: {summary['avg_duration_ms']}ms")
report.append("")
if summary["by_type"]:
report.append("By CAPTCHA type:")
for ctype, data in summary["by_type"].items():
total = data["success"] + data["fail"]
rate = data["success"] / total * 100 if total else 0
report.append(f" {ctype}: {total} solves, {rate:.0f}% success, ${data['cost']:.4f}")
if summary["errors"]:
report.append("")
report.append("Errors:")
for error, count in sorted(summary["errors"].items(), key=lambda x: -x[1]):
report.append(f" {error}: {count}")
return "\n".join(report)
FAQ
What should I log for compliance?
Log: timestamp, task ID, CAPTCHA type, target domain, success/failure, and cost. Never log the actual CAPTCHA token or API key.
How long should I keep audit logs?
Depends on your compliance requirements. Common retention periods: 30 days for debugging, 90 days for billing, 1 year for audit compliance.
Does logging slow down CAPTCHA solving?
Async or file-based logging adds negligible overhead (<1ms per operation). Don't log to remote services synchronously in the critical path.
Related Guides
Track every solve — monitor with CaptchaAI.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.