Correctly encoding CAPTCHA images in base64 is the first step to reliable solving. Wrong encoding means wrong answers or errors. This guide covers the right way to do it.
The Base64 Submission Format
CaptchaAI accepts image CAPTCHAs in base64 format via the method=base64 parameter:
import requests
import base64
import os
def submit_image_captcha(image_base64):
"""Submit base64-encoded image to CaptchaAI."""
resp = requests.post("https://ocr.captchaai.com/in.php", data={
"key": os.environ["CAPTCHAAI_API_KEY"],
"method": "base64",
"body": image_base64,
"json": 1,
}, timeout=30)
return resp.json()
Encoding from File
# from_file.py
import base64
def encode_from_file(filepath):
"""Read an image file and return base64 string."""
with open(filepath, "rb") as f:
raw = f.read()
return base64.b64encode(raw).decode("ascii")
# Usage
b64 = encode_from_file("captcha.png")
print(f"Encoded length: {len(b64)} chars")
Encoding from URL
# from_url.py
import requests
import base64
def encode_from_url(image_url):
"""Download image and return base64 string."""
resp = requests.get(image_url, timeout=15)
resp.raise_for_status()
# Verify it's actually an image
content_type = resp.headers.get("Content-Type", "")
if not content_type.startswith("image/"):
raise ValueError(f"Not an image: {content_type}")
return base64.b64encode(resp.content).decode("ascii")
# Usage
b64 = encode_from_url("https://example.com/captcha.png")
Encoding from Selenium Screenshot
# from_selenium.py
import base64
from selenium.webdriver.common.by import By
def encode_from_element(driver, selector):
"""Screenshot a specific element and return base64."""
element = driver.find_element(By.CSS_SELECTOR, selector)
screenshot_b64 = element.screenshot_as_base64
return screenshot_b64
def encode_from_page_crop(driver, selector):
"""Crop a specific region from the page screenshot."""
from PIL import Image
import io
element = driver.find_element(By.CSS_SELECTOR, selector)
location = element.location
size = element.size
# Full page screenshot
png = driver.get_screenshot_as_png()
img = Image.open(io.BytesIO(png))
# Crop to element bounds
left = location["x"]
top = location["y"]
right = left + size["width"]
bottom = top + size["height"]
cropped = img.crop((left, top, right, bottom))
# Encode
buffer = io.BytesIO()
cropped.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode("ascii")
Common Encoding Mistakes
Mistake 1: Including the Data URI Prefix
# WRONG — includes data URI prefix
bad = "data:image/png;base64,iVBORw0KGgo..."
# RIGHT — raw base64 only
good = "iVBORw0KGgo..."
# Fix: Strip the prefix
def clean_base64(b64_string):
if "," in b64_string:
return b64_string.split(",", 1)[1]
return b64_string
Mistake 2: Double Encoding
# WRONG — encoding an already-encoded string
already_b64 = element.screenshot_as_base64
double_encoded = base64.b64encode(already_b64.encode()).decode() # BAD
# RIGHT — use as-is
correct = element.screenshot_as_base64 # Already base64
Mistake 3: Encoding Text Instead of Bytes
# WRONG — reading as text
with open("captcha.png", "r") as f: # Text mode
content = f.read() # Corrupted binary data
# RIGHT — reading as bytes
with open("captcha.png", "rb") as f: # Binary mode
content = f.read()
encoded = base64.b64encode(content).decode("ascii")
Validation Before Submission
# validate.py
import base64
import io
def validate_captcha_image(b64_string):
"""Validate base64 image before submitting to CaptchaAI."""
errors = []
# Check for data URI prefix
if b64_string.startswith("data:"):
errors.append("Contains data URI prefix — strip it")
b64_string = b64_string.split(",", 1)[1]
# Try decoding
try:
decoded = base64.b64decode(b64_string)
except Exception as e:
return {"valid": False, "errors": [f"Invalid base64: {e}"]}
# Check size
size_kb = len(decoded) / 1024
if size_kb < 1:
errors.append(f"Image too small ({size_kb:.1f} KB) — likely corrupt")
if size_kb > 500:
errors.append(f"Image large ({size_kb:.1f} KB) — consider resizing")
# Check image format
if decoded[:8] == b'\x89PNG\r\n\x1a\n':
fmt = "PNG"
elif decoded[:3] == b'\xff\xd8\xff':
fmt = "JPEG"
elif decoded[:4] == b'GIF8':
fmt = "GIF"
elif decoded[:4] == b'RIFF':
fmt = "WEBP"
else:
errors.append("Unknown image format")
fmt = "unknown"
return {
"valid": len(errors) == 0,
"format": fmt,
"size_kb": round(size_kb, 1),
"errors": errors,
}
# Usage
result = validate_captcha_image(b64_string)
if not result["valid"]:
print(f"Issues: {result['errors']}")
else:
print(f"Valid {result['format']}, {result['size_kb']} KB")
Image Format Recommendations
| Format | Best For | Size | Quality |
|---|---|---|---|
| PNG | Text CAPTCHAs, screenshots | Larger | Lossless |
| JPEG | Photo-based CAPTCHAs | Smaller | Lossy (use quality ≥ 85) |
| GIF | Animated CAPTCHAs | Variable | Limited colors |
| WEBP | Modern browsers | Smallest | Good quality |
Recommendation: Use PNG for text CAPTCHAs. The lossless compression preserves character edges, improving solve accuracy.
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
ERROR_WRONG_FILE_EXTENSION |
Invalid base64 data | Validate with validate_captcha_image() |
ERROR_TOO_BIG_CAPTCHA_FILESIZE |
Image over 600 KB | Resize or compress before encoding |
ERROR_ZERO_CAPTCHA_FILESIZE |
Empty or corrupt image | Check download succeeded |
| Wrong solve result | Over-compressed JPEG | Use PNG or JPEG quality ≥ 85 |
FAQ
What's the maximum image size CaptchaAI accepts?
600 KB for base64 body. Resize large screenshots before encoding.
Should I use PNG or JPEG for text CAPTCHAs?
PNG. JPEG compression can blur character edges, reducing accuracy. PNG preserves exact pixels.
Can I submit SVG images?
No. Convert SVG to PNG first using a library like Pillow or cairosvg.
Related Guides
Encode CAPTCHAs correctly — start with CaptchaAI.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.