DynamoDB fits serverless CAPTCHA workflows naturally — no connection pooling headaches, built-in TTL for automatic cleanup, and consistent performance at any scale. This guide covers table design, item structure, and query patterns for tracking CAPTCHA solves in Lambda-based architectures.
Table Design
Single-Table Pattern
One DynamoDB table handles solve history, active tasks, and aggregated stats:
| Partition Key (PK) | Sort Key (SK) | Purpose |
|---|---|---|
SOLVE#{captcha_id} |
META |
Solve record |
SITE#{sitekey} |
SOLVE#{timestamp} |
Per-site solve history |
STATS#{date} |
TYPE#{captcha_type} |
Daily aggregated stats |
ACTIVE#{captcha_id} |
TASK |
In-flight task tracking |
Table Definition
{
"TableName": "CaptchaSolves",
"KeySchema": [
{ "AttributeName": "PK", "KeyType": "HASH" },
{ "AttributeName": "SK", "KeyType": "RANGE" }
],
"AttributeDefinitions": [
{ "AttributeName": "PK", "KeyType": "S" },
{ "AttributeName": "SK", "KeyType": "S" },
{ "AttributeName": "GSI1PK", "KeyType": "S" },
{ "AttributeName": "GSI1SK", "KeyType": "S" }
],
"GlobalSecondaryIndexes": [
{
"IndexName": "GSI1",
"KeySchema": [
{ "AttributeName": "GSI1PK", "KeyType": "HASH" },
{ "AttributeName": "GSI1SK", "KeyType": "RANGE" }
],
"Projection": { "ProjectionType": "ALL" }
}
],
"BillingMode": "PAY_PER_REQUEST",
"TimeToLiveSpecification": {
"AttributeName": "ttl",
"Enabled": true
}
}
Python Implementation
Setup
import os
import time
from datetime import datetime, timezone
import boto3
import requests
dynamodb = boto3.resource("dynamodb")
table = dynamodb.Table(os.environ.get("DYNAMODB_TABLE", "CaptchaSolves"))
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
Solve and Track
def solve_and_track(sitekey, pageurl, captcha_type="recaptcha_v2", project=None):
now = datetime.now(timezone.utc)
timestamp = now.isoformat()
ttl_90_days = int(now.timestamp()) + (90 * 24 * 3600)
# Submit to CaptchaAI
resp = requests.post("https://ocr.captchaai.com/in.php", data={
"key": API_KEY,
"method": "userrecaptcha",
"googlekey": sitekey,
"pageurl": pageurl,
"json": 1
})
data = resp.json()
if data.get("status") != 1:
# Store error record
table.put_item(Item={
"PK": f"SITE#{sitekey}",
"SK": f"SOLVE#{timestamp}",
"captcha_type": captcha_type,
"pageurl": pageurl,
"status": "error",
"error": data.get("request"),
"submitted_at": timestamp,
"project": project or "default",
"ttl": ttl_90_days,
"GSI1PK": f"STATUS#error",
"GSI1SK": timestamp
})
return {"error": data.get("request")}
captcha_id = data["request"]
# Track active task
table.put_item(Item={
"PK": f"ACTIVE#{captcha_id}",
"SK": "TASK",
"sitekey": sitekey,
"pageurl": pageurl,
"captcha_type": captcha_type,
"submitted_at": timestamp,
"ttl": int(now.timestamp()) + 600 # Auto-clean in 10 min
})
# Poll for result
polls = 0
for _ in range(60):
time.sleep(5)
polls += 1
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get",
"id": captcha_id, "json": 1
}).json()
if result.get("status") == 1:
solved_at = datetime.now(timezone.utc).isoformat()
elapsed_ms = int(
(datetime.now(timezone.utc) - now).total_seconds() * 1000
)
# Store success record
table.put_item(Item={
"PK": f"SOLVE#{captcha_id}",
"SK": "META",
"captcha_type": captcha_type,
"sitekey": sitekey,
"pageurl": pageurl,
"status": "solved",
"submitted_at": timestamp,
"solved_at": solved_at,
"elapsed_ms": elapsed_ms,
"polls": polls,
"project": project or "default",
"ttl": ttl_90_days,
"GSI1PK": f"STATUS#solved",
"GSI1SK": timestamp
})
# Also store in site history
table.put_item(Item={
"PK": f"SITE#{sitekey}",
"SK": f"SOLVE#{timestamp}",
"captcha_id": captcha_id,
"status": "solved",
"elapsed_ms": elapsed_ms,
"ttl": ttl_90_days
})
# Remove active task
table.delete_item(Key={
"PK": f"ACTIVE#{captcha_id}", "SK": "TASK"
})
# Update daily stats
update_daily_stats(captcha_type, True, elapsed_ms)
return {"solution": result["request"]}
if result.get("request") != "CAPCHA_NOT_READY":
table.put_item(Item={
"PK": f"SITE#{sitekey}",
"SK": f"SOLVE#{timestamp}",
"captcha_id": captcha_id,
"status": "error",
"error": result.get("request"),
"ttl": ttl_90_days
})
table.delete_item(Key={
"PK": f"ACTIVE#{captcha_id}", "SK": "TASK"
})
update_daily_stats(captcha_type, False, 0)
return {"error": result.get("request")}
table.delete_item(Key={"PK": f"ACTIVE#{captcha_id}", "SK": "TASK"})
update_daily_stats(captcha_type, False, 0)
return {"error": "TIMEOUT"}
def update_daily_stats(captcha_type, success, elapsed_ms):
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
update_expr = "SET total_solves = if_not_exists(total_solves, :zero) + :one"
expr_values = {":zero": 0, ":one": 1}
if success:
update_expr += ", successful = if_not_exists(successful, :zero) + :one"
update_expr += ", total_elapsed = if_not_exists(total_elapsed, :zero) + :elapsed"
expr_values[":elapsed"] = elapsed_ms
else:
update_expr += ", failed = if_not_exists(failed, :zero) + :one"
table.update_item(
Key={"PK": f"STATS#{date_str}", "SK": f"TYPE#{captcha_type}"},
UpdateExpression=update_expr,
ExpressionAttributeValues=expr_values
)
Query Patterns
def get_site_history(sitekey, limit=50):
"""Get recent solves for a specific site key."""
response = table.query(
KeyConditionExpression="PK = :pk",
ExpressionAttributeValues={":pk": f"SITE#{sitekey}"},
ScanIndexForward=False,
Limit=limit
)
return response["Items"]
def get_daily_stats(date_str=None):
"""Get stats for a specific date (default: today)."""
if not date_str:
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
response = table.query(
KeyConditionExpression="PK = :pk",
ExpressionAttributeValues={":pk": f"STATS#{date_str}"}
)
return response["Items"]
def get_active_tasks():
"""List all currently active CAPTCHA tasks."""
response = table.query(
IndexName="GSI1",
KeyConditionExpression="GSI1PK = :pk",
ExpressionAttributeValues={":pk": "STATUS#polling"}
)
return response["Items"]
JavaScript Implementation
const { DynamoDBClient } = require("@aws-sdk/client-dynamodb");
const { DynamoDBDocumentClient, PutCommand, QueryCommand, UpdateCommand } = require("@aws-sdk/lib-dynamodb");
const axios = require("axios");
const client = DynamoDBDocumentClient.from(new DynamoDBClient({}));
const TABLE = process.env.DYNAMODB_TABLE || "CaptchaSolves";
const API_KEY = process.env.CAPTCHAAI_API_KEY;
async function solveAndTrack(sitekey, pageurl, type = "recaptcha_v2") {
const now = new Date();
const timestamp = now.toISOString();
const ttl = Math.floor(now.getTime() / 1000) + 90 * 24 * 3600;
const submit = await axios.post("https://ocr.captchaai.com/in.php", null, {
params: { key: API_KEY, method: "userrecaptcha", googlekey: sitekey, pageurl, json: 1 },
});
if (submit.data.status !== 1) {
await client.send(new PutCommand({
TableName: TABLE,
Item: { PK: `SITE#${sitekey}`, SK: `SOLVE#${timestamp}`, status: "error", error: submit.data.request, ttl },
}));
return { error: submit.data.request };
}
const captchaId = submit.data.request;
let polls = 0;
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
polls++;
const poll = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
});
if (poll.data.status === 1) {
const elapsed = Date.now() - now.getTime();
await client.send(new PutCommand({
TableName: TABLE,
Item: {
PK: `SOLVE#${captchaId}`, SK: "META", captcha_type: type,
sitekey, pageurl, status: "solved", submitted_at: timestamp,
solved_at: new Date().toISOString(), elapsed_ms: elapsed, polls, ttl,
},
}));
return { solution: poll.data.request };
}
if (poll.data.request !== "CAPCHA_NOT_READY") {
return { error: poll.data.request };
}
}
return { error: "TIMEOUT" };
}
async function getSiteHistory(sitekey, limit = 50) {
const result = await client.send(new QueryCommand({
TableName: TABLE,
KeyConditionExpression: "PK = :pk",
ExpressionAttributeValues: { ":pk": `SITE#${sitekey}` },
ScanIndexForward: false,
Limit: limit,
}));
return result.Items;
}
Cost Optimization
| Strategy | Impact |
|---|---|
| Use on-demand billing for variable workloads | No over-provisioning |
| Enable TTL for automatic record cleanup | Reduces storage costs |
| Project only needed attributes in queries | Lower read unit consumption |
Batch writes with BatchWriteItem |
Fewer API calls |
| Use DynamoDB Streams for analytics | Offload aggregation to Lambda |
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
ProvisionedThroughputExceededException |
Too many writes per second | Switch to on-demand billing or increase WCU |
| TTL items not deleted immediately | DynamoDB TTL deletion is eventual (~48 hours) | Don't rely on TTL for real-time cleanup; filter expired items in queries |
Hot partition on STATS#{date} |
All workers writing to same partition | Use randomized suffix: STATS#{date}#shard{0-9} |
| Query returns too many items | Broad partition key | Add SK conditions to narrow results |
FAQ
Why DynamoDB instead of RDS for serverless CAPTCHA tracking?
DynamoDB has no connection limit — perfect for Lambda where each invocation opens a new connection. RDS requires connection pooling (RDS Proxy) which adds cost and complexity.
How much does DynamoDB cost for CAPTCHA tracking?
With on-demand billing: ~$1.25 per million writes and ~$0.25 per million reads. At 10,000 solves/day, expect under $1/month for storage and access.
Can I query across all CAPTCHA types?
Use the GSI1 index to query by status across types. For cross-type analytics, aggregate using DynamoDB Streams and a Lambda function that writes to the STATS# partition.
Next Steps
Build serverless CAPTCHA tracking that scales automatically — get your CaptchaAI API key.
Related guides:
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.