Logging and Auditing CAPTCHA Solving Operations

Production CAPTCHA solving needs structured logging for cost tracking, debugging, and compliance. Here's how to implement it properly.

Structured Logging Setup

import logging
import json
import time
from datetime import datetime, timezone


class CaptchaLogger:
    """Structured logger for CAPTCHA operations."""

    def __init__(self, logger_name="captchaai"):
        self.logger = logging.getLogger(logger_name)
        self.logger.setLevel(logging.INFO)

        # JSON formatter for structured logs
        handler = logging.StreamHandler()
        handler.setFormatter(JsonFormatter())
        self.logger.addHandler(handler)

    def log_submit(self, task_id, captcha_type, params):
        """Log CAPTCHA task submission."""
        safe_params = {k: v for k, v in params.items() if k != "key"}
        self.logger.info(json.dumps({
            "event": "captcha_submit",
            "task_id": task_id,
            "type": captcha_type,
            "params": safe_params,
            "timestamp": datetime.now(timezone.utc).isoformat(),
        }))

    def log_result(self, task_id, success, duration_ms, error=None):
        """Log CAPTCHA solve result."""
        self.logger.info(json.dumps({
            "event": "captcha_result",
            "task_id": task_id,
            "success": success,
            "duration_ms": duration_ms,
            "error": error,
            "timestamp": datetime.now(timezone.utc).isoformat(),
        }))

    def log_cost(self, task_id, captcha_type, cost):
        """Log CAPTCHA solve cost."""
        self.logger.info(json.dumps({
            "event": "captcha_cost",
            "task_id": task_id,
            "type": captcha_type,
            "cost_usd": cost,
            "timestamp": datetime.now(timezone.utc).isoformat(),
        }))


class JsonFormatter(logging.Formatter):
    """Format log records as JSON."""

    def format(self, record):
        return record.getMessage()

Audited Solver

import requests
import time


class AuditedSolver:
    """Solver with built-in logging and auditing."""

    COST_ESTIMATES = {
        "recaptcha_v2": 0.003,
        "recaptcha_v3": 0.004,
        "turnstile": 0.002,
        "geetest": 0.003,
        "image": 0.001,
    }

    def __init__(self, api_key, logger=None):
        self.api_key = api_key
        self.log = logger or CaptchaLogger()
        self.stats = SolveStats()

    def solve(self, captcha_type, params):
        """Solve CAPTCHA with full audit trail."""
        start = time.time()

        # Submit
        data = {"key": self.api_key, "json": 1, **params}
        resp = requests.post(
            "https://ocr.captchaai.com/in.php", data=data, timeout=30,
        )
        result = resp.json()

        if result.get("status") != 1:
            error = result.get("request", "UNKNOWN")
            self.log.log_result(None, False, 0, error=error)
            self.stats.record_failure(captcha_type, error)
            raise RuntimeError(f"Submit error: {error}")

        task_id = result["request"]
        self.log.log_submit(task_id, captcha_type, params)

        # Poll
        time.sleep(10)
        for _ in range(24):
            resp = requests.get("https://ocr.captchaai.com/res.php", params={
                "key": self.api_key, "action": "get",
                "id": task_id, "json": 1,
            }, timeout=15)
            data = resp.json()

            if data.get("status") == 1:
                duration_ms = int((time.time() - start) * 1000)
                cost = self.COST_ESTIMATES.get(captcha_type, 0.003)

                self.log.log_result(task_id, True, duration_ms)
                self.log.log_cost(task_id, captcha_type, cost)
                self.stats.record_success(captcha_type, duration_ms, cost)

                return data["request"]

            if data["request"] != "CAPCHA_NOT_READY":
                duration_ms = int((time.time() - start) * 1000)
                self.log.log_result(task_id, False, duration_ms, error=data["request"])
                self.stats.record_failure(captcha_type, data["request"])
                raise RuntimeError(data["request"])

            time.sleep(5)

        duration_ms = int((time.time() - start) * 1000)
        self.log.log_result(task_id, False, duration_ms, error="TIMEOUT")
        self.stats.record_failure(captcha_type, "TIMEOUT")
        raise TimeoutError("Solve timeout")


class SolveStats:
    """Track aggregate solve statistics."""

    def __init__(self):
        self.successes = 0
        self.failures = 0
        self.total_cost = 0.0
        self.total_duration_ms = 0
        self.errors = {}
        self.by_type = {}

    def record_success(self, captcha_type, duration_ms, cost):
        self.successes += 1
        self.total_cost += cost
        self.total_duration_ms += duration_ms

        if captcha_type not in self.by_type:
            self.by_type[captcha_type] = {"success": 0, "fail": 0, "cost": 0.0}
        self.by_type[captcha_type]["success"] += 1
        self.by_type[captcha_type]["cost"] += cost

    def record_failure(self, captcha_type, error):
        self.failures += 1
        self.errors[error] = self.errors.get(error, 0) + 1

        if captcha_type not in self.by_type:
            self.by_type[captcha_type] = {"success": 0, "fail": 0, "cost": 0.0}
        self.by_type[captcha_type]["fail"] += 1

    def get_summary(self):
        total = self.successes + self.failures
        return {
            "total_solves": total,
            "success_rate": f"{self.successes / total * 100:.1f}%" if total else "N/A",
            "total_cost": f"${self.total_cost:.4f}",
            "avg_duration_ms": self.total_duration_ms // max(self.successes, 1),
            "errors": self.errors,
            "by_type": self.by_type,
        }

File-Based Audit Log

import csv
import os
from datetime import datetime, timezone


class AuditLog:
    """Append-only CSV audit log for compliance."""

    def __init__(self, log_dir="logs"):
        os.makedirs(log_dir, exist_ok=True)
        date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
        self.filepath = os.path.join(log_dir, f"captcha_audit_{date}.csv")
        self._ensure_header()

    def _ensure_header(self):
        """Write CSV header if file is new."""
        if not os.path.exists(self.filepath):
            with open(self.filepath, "w", newline="") as f:
                writer = csv.writer(f)
                writer.writerow([
                    "timestamp", "event", "task_id", "captcha_type",
                    "success", "duration_ms", "cost_usd", "error",
                ])

    def record(self, event, task_id, captcha_type, success=None, 
               duration_ms=None, cost_usd=None, error=None):
        """Append a record to the audit log."""
        with open(self.filepath, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([
                datetime.now(timezone.utc).isoformat(),
                event, task_id, captcha_type,
                success, duration_ms, cost_usd, error,
            ])


# Usage
audit = AuditLog()
audit.record("submit", "12345", "recaptcha_v2")
audit.record("result", "12345", "recaptcha_v2", success=True, duration_ms=15200, cost_usd=0.003)

Usage Reporting

def generate_daily_report(stats):
    """Generate daily usage report."""
    summary = stats.get_summary()

    report = []
    report.append("=" * 50)
    report.append("CaptchaAI Daily Usage Report")
    report.append("=" * 50)
    report.append(f"Total solves: {summary['total_solves']}")
    report.append(f"Success rate: {summary['success_rate']}")
    report.append(f"Total cost: {summary['total_cost']}")
    report.append(f"Avg duration: {summary['avg_duration_ms']}ms")
    report.append("")

    if summary["by_type"]:
        report.append("By CAPTCHA type:")
        for ctype, data in summary["by_type"].items():
            total = data["success"] + data["fail"]
            rate = data["success"] / total * 100 if total else 0
            report.append(f"  {ctype}: {total} solves, {rate:.0f}% success, ${data['cost']:.4f}")

    if summary["errors"]:
        report.append("")
        report.append("Errors:")
        for error, count in sorted(summary["errors"].items(), key=lambda x: -x[1]):
            report.append(f"  {error}: {count}")

    return "\n".join(report)