SQLite needs no server setup, ships with Python, and handles thousands of writes per second. It's the fastest way to add local solve tracking, token caching, and request deduplication to a CAPTCHA solving workflow.
When to Use SQLite
| Use Case | SQLite | Better Alternative |
|---|---|---|
| Single-machine development | ✅ | — |
| Small-scale production (< 1K solves/hr) | ✅ | — |
| Test result tracking | ✅ | — |
| Multi-server production | ❌ | PostgreSQL, MongoDB |
| High-throughput distributed | ❌ | Redis, DynamoDB |
| Real-time analytics dashboard | ❌ | TimescaleDB, InfluxDB |
Schema Design
CREATE TABLE IF NOT EXISTS captcha_solves (
id INTEGER PRIMARY KEY AUTOINCREMENT,
captcha_id TEXT,
type TEXT NOT NULL,
sitekey TEXT,
pageurl TEXT,
status TEXT NOT NULL DEFAULT 'submitted',
solution TEXT,
error TEXT,
submitted_at TEXT NOT NULL DEFAULT (datetime('now')),
solved_at TEXT,
elapsed_ms INTEGER,
polls INTEGER DEFAULT 0,
project TEXT
);
CREATE INDEX IF NOT EXISTS idx_submitted_at ON captcha_solves(submitted_at);
CREATE INDEX IF NOT EXISTS idx_type_status ON captcha_solves(type, status);
CREATE INDEX IF NOT EXISTS idx_sitekey ON captcha_solves(sitekey);
-- Token cache for reuse within TTL
CREATE TABLE IF NOT EXISTS token_cache (
sitekey TEXT NOT NULL,
pageurl TEXT NOT NULL,
token TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
expires_at TEXT NOT NULL,
used INTEGER DEFAULT 0,
PRIMARY KEY (sitekey, pageurl, token)
);
CREATE INDEX IF NOT EXISTS idx_cache_lookup
ON token_cache(sitekey, pageurl, used, expires_at);
Python Implementation
Database Setup
import os
import time
import sqlite3
from datetime import datetime, timedelta, timezone
import requests
DB_PATH = os.environ.get("CAPTCHA_DB", "captcha_solves.db")
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL") # Better concurrent read performance
conn.execute("PRAGMA busy_timeout=5000")
return conn
def init_db():
conn = get_db()
conn.executescript("""
CREATE TABLE IF NOT EXISTS captcha_solves (
id INTEGER PRIMARY KEY AUTOINCREMENT,
captcha_id TEXT,
type TEXT NOT NULL,
sitekey TEXT,
pageurl TEXT,
status TEXT NOT NULL DEFAULT 'submitted',
solution TEXT,
error TEXT,
submitted_at TEXT NOT NULL DEFAULT (datetime('now')),
solved_at TEXT,
elapsed_ms INTEGER,
polls INTEGER DEFAULT 0,
project TEXT
);
CREATE INDEX IF NOT EXISTS idx_submitted_at ON captcha_solves(submitted_at);
CREATE INDEX IF NOT EXISTS idx_type_status ON captcha_solves(type, status);
CREATE TABLE IF NOT EXISTS token_cache (
sitekey TEXT NOT NULL,
pageurl TEXT NOT NULL,
token TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
expires_at TEXT NOT NULL,
used INTEGER DEFAULT 0,
PRIMARY KEY (sitekey, pageurl, token)
);
CREATE INDEX IF NOT EXISTS idx_cache_lookup
ON token_cache(sitekey, pageurl, used, expires_at);
""")
conn.close()
init_db()
Solve and Track
def solve_recaptcha(sitekey, pageurl, project=None):
conn = get_db()
# Check cache first
cached = get_cached_token(conn, sitekey, pageurl)
if cached:
conn.close()
return cached
# Insert tracking record
now = datetime.now(timezone.utc).isoformat()
cursor = conn.execute(
"INSERT INTO captcha_solves (type, sitekey, pageurl, submitted_at, project) "
"VALUES (?, ?, ?, ?, ?)",
("recaptcha_v2", sitekey, pageurl, now, project)
)
row_id = cursor.lastrowid
conn.commit()
# Submit to CaptchaAI
resp = requests.post("https://ocr.captchaai.com/in.php", data={
"key": API_KEY,
"method": "userrecaptcha",
"googlekey": sitekey,
"pageurl": pageurl,
"json": 1
})
data = resp.json()
if data.get("status") != 1:
conn.execute(
"UPDATE captcha_solves SET status=?, error=? WHERE id=?",
("error", data.get("request"), row_id)
)
conn.commit()
conn.close()
return None
captcha_id = data["request"]
conn.execute(
"UPDATE captcha_solves SET captcha_id=?, status=? WHERE id=?",
(captcha_id, "polling", row_id)
)
conn.commit()
# Poll
polls = 0
for _ in range(60):
time.sleep(5)
polls += 1
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get",
"id": captcha_id, "json": 1
}).json()
if result.get("status") == 1:
solved_at = datetime.now(timezone.utc).isoformat()
submitted = datetime.fromisoformat(now)
elapsed = int((datetime.now(timezone.utc) - submitted).total_seconds() * 1000)
conn.execute(
"UPDATE captcha_solves SET status=?, solution=?, solved_at=?, "
"elapsed_ms=?, polls=? WHERE id=?",
("solved", result["request"], solved_at, elapsed, polls, row_id)
)
# Cache the token
cache_token(conn, sitekey, pageurl, result["request"])
conn.commit()
conn.close()
return result["request"]
if result.get("request") != "CAPCHA_NOT_READY":
conn.execute(
"UPDATE captcha_solves SET status=?, error=?, polls=? WHERE id=?",
("error", result.get("request"), polls, row_id)
)
conn.commit()
conn.close()
return None
conn.execute(
"UPDATE captcha_solves SET status=?, polls=? WHERE id=?",
("timeout", polls, row_id)
)
conn.commit()
conn.close()
return None
Token Caching
def cache_token(conn, sitekey, pageurl, token, ttl_seconds=90):
expires_at = (datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)).isoformat()
conn.execute(
"INSERT OR REPLACE INTO token_cache (sitekey, pageurl, token, expires_at) "
"VALUES (?, ?, ?, ?)",
(sitekey, pageurl, token, expires_at)
)
def get_cached_token(conn, sitekey, pageurl):
now = datetime.now(timezone.utc).isoformat()
row = conn.execute(
"SELECT token FROM token_cache "
"WHERE sitekey=? AND pageurl=? AND used=0 AND expires_at > ? "
"ORDER BY expires_at ASC LIMIT 1",
(sitekey, pageurl, now)
).fetchone()
if row:
conn.execute(
"UPDATE token_cache SET used=1 WHERE token=?",
(row["token"],)
)
conn.commit()
return row["token"]
return None
Analytics Queries
def get_stats(hours=24):
conn = get_db()
cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
total = conn.execute(
"SELECT COUNT(*) FROM captcha_solves WHERE submitted_at >= ?", (cutoff,)
).fetchone()[0]
solved = conn.execute(
"SELECT COUNT(*) FROM captcha_solves WHERE submitted_at >= ? AND status='solved'",
(cutoff,)
).fetchone()[0]
avg_time = conn.execute(
"SELECT AVG(elapsed_ms) FROM captcha_solves "
"WHERE submitted_at >= ? AND status='solved'",
(cutoff,)
).fetchone()[0]
conn.close()
return {
"total": total,
"solved": solved,
"success_rate": (solved / total * 100) if total else 0,
"avg_time_ms": round(avg_time) if avg_time else 0
}
def cleanup_old_records(days=30):
conn = get_db()
cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
conn.execute("DELETE FROM captcha_solves WHERE submitted_at < ?", (cutoff,))
conn.execute("DELETE FROM token_cache WHERE expires_at < ?",
(datetime.now(timezone.utc).isoformat(),))
conn.execute("VACUUM")
conn.commit()
conn.close()
JavaScript Implementation
const Database = require("better-sqlite3");
const axios = require("axios");
const db = new Database(process.env.CAPTCHA_DB || "captcha_solves.db");
const API_KEY = process.env.CAPTCHAAI_API_KEY;
db.pragma("journal_mode = WAL");
db.exec(`
CREATE TABLE IF NOT EXISTS captcha_solves (
id INTEGER PRIMARY KEY AUTOINCREMENT,
captcha_id TEXT, type TEXT NOT NULL, sitekey TEXT, pageurl TEXT,
status TEXT DEFAULT 'submitted', solution TEXT, error TEXT,
submitted_at TEXT DEFAULT (datetime('now')),
solved_at TEXT, elapsed_ms INTEGER, polls INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_submitted ON captcha_solves(submitted_at);
`);
async function solveAndStore(sitekey, pageurl) {
const submittedAt = new Date().toISOString();
const insert = db.prepare(
"INSERT INTO captcha_solves (type, sitekey, pageurl, submitted_at) VALUES (?, ?, ?, ?)"
);
const { lastInsertRowid } = insert.run("recaptcha_v2", sitekey, pageurl, submittedAt);
const submit = await axios.post("https://ocr.captchaai.com/in.php", null, {
params: { key: API_KEY, method: "userrecaptcha", googlekey: sitekey, pageurl, json: 1 },
});
if (submit.data.status !== 1) {
db.prepare("UPDATE captcha_solves SET status=?, error=? WHERE id=?")
.run("error", submit.data.request, lastInsertRowid);
return null;
}
const captchaId = submit.data.request;
db.prepare("UPDATE captcha_solves SET captcha_id=?, status=? WHERE id=?")
.run(captchaId, "polling", lastInsertRowid);
let polls = 0;
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
polls++;
const poll = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
});
if (poll.data.status === 1) {
const elapsed = Date.now() - new Date(submittedAt).getTime();
db.prepare(
"UPDATE captcha_solves SET status=?, solution=?, solved_at=?, elapsed_ms=?, polls=? WHERE id=?"
).run("solved", poll.data.request, new Date().toISOString(), elapsed, polls, lastInsertRowid);
return poll.data.request;
}
if (poll.data.request !== "CAPCHA_NOT_READY") {
db.prepare("UPDATE captcha_solves SET status=?, error=?, polls=? WHERE id=?")
.run("error", poll.data.request, polls, lastInsertRowid);
return null;
}
}
db.prepare("UPDATE captcha_solves SET status=?, polls=? WHERE id=?")
.run("timeout", polls, lastInsertRowid);
return null;
}
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
database is locked |
Concurrent writes without WAL mode | Add PRAGMA journal_mode=WAL |
| Database file grows large | No cleanup routine | Run cleanup_old_records() daily |
| Slow queries on large datasets | Missing indexes | Add indexes on submitted_at and type |
| Token cache stale | Expired tokens not cleaned | Cleanup expired entries before lookup |
FAQ
Can SQLite handle high-volume CAPTCHA solving?
SQLite handles up to ~1,000 writes/second with WAL mode enabled. For single-machine setups doing fewer than 1,000 solves/hour, it's reliable. Beyond that, consider PostgreSQL or MongoDB.
Should I store the full solution token?
Store it briefly for debugging, then let cleanup routines remove old records. Tokens expire in 90–120 seconds, so they're useless after that.
How do I migrate from SQLite to a production database?
Export with sqlite3 captcha_solves.db ".dump" and import into PostgreSQL or MongoDB. The schema translates directly — SQLite is a great development-to-production stepping stone.
Related Articles
Next Steps
Add lightweight solve tracking to your pipeline — get your CaptchaAI API key and start logging results today.
Related guides:
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.