Preprocessing CAPTCHA images before submitting to CaptchaAI can improve solve rates on challenging images. These Python techniques clean up noise, improve contrast, and help the solver focus on the text.
When to Preprocess
Most CAPTCHAs solve fine without preprocessing. Consider it when:
- Solve rate drops below 80% on a specific site
- CAPTCHA images have heavy background noise
- Text is very faint or low contrast
- Background and text colors are similar
Grayscale Conversion
# grayscale.py
from PIL import Image
import base64
import io
def to_grayscale(image_path):
"""Convert to grayscale — simplifies analysis."""
img = Image.open(image_path)
gray = img.convert("L")
return gray
def image_to_base64(img):
"""Convert PIL Image to base64 string."""
buffer = io.BytesIO()
img.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode("ascii")
Contrast Enhancement
# contrast.py
from PIL import ImageEnhance
def enhance_contrast(img, factor=2.0):
"""Increase contrast to make text stand out.
factor: 1.0 = original, 2.0 = double contrast, 3.0 = triple.
"""
enhancer = ImageEnhance.Contrast(img)
return enhancer.enhance(factor)
def enhance_sharpness(img, factor=2.0):
"""Sharpen blurred text edges."""
enhancer = ImageEnhance.Sharpness(img)
return enhancer.enhance(factor)
Noise Removal
# noise.py
from PIL import ImageFilter
def remove_dots(img, min_neighbors=5):
"""Remove isolated dot noise using median filter."""
return img.filter(ImageFilter.MedianFilter(size=3))
def remove_lines(img):
"""Reduce line noise using morphological operations."""
import numpy as np
arr = np.array(img)
# For each pixel, if too many neighbors are the same value,
# it's likely a line — keep only isolated text patterns
from PIL import ImageFilter
smoothed = img.filter(ImageFilter.ModeFilter(size=3))
return smoothed
def smooth_edges(img):
"""Smooth character edges for cleaner text."""
return img.filter(ImageFilter.SMOOTH)
Binarization (Thresholding)
# binarize.py
from PIL import Image
def binarize_simple(img, threshold=128):
"""Convert to pure black and white."""
gray = img.convert("L")
return gray.point(lambda p: 255 if p > threshold else 0)
def binarize_adaptive(img, block_size=11):
"""Adaptive threshold — handles uneven lighting."""
import numpy as np
arr = np.array(img.convert("L"), dtype=np.float64)
h, w = arr.shape
# Compute local mean
result = np.zeros_like(arr, dtype=np.uint8)
half = block_size // 2
for y in range(h):
for x in range(w):
y1 = max(0, y - half)
y2 = min(h, y + half + 1)
x1 = max(0, x - half)
x2 = min(w, x + half + 1)
local_mean = arr[y1:y2, x1:x2].mean()
result[y, x] = 255 if arr[y, x] > local_mean - 10 else 0
return Image.fromarray(result)
def auto_threshold(img):
"""Automatically find the best threshold using Otsu's method."""
import numpy as np
arr = np.array(img.convert("L"))
histogram = np.histogram(arr.flatten(), bins=256, range=(0, 256))[0]
total = arr.size
best_thresh = 0
best_variance = 0
weight_bg = 0
sum_bg = 0
total_sum = sum(i * histogram[i] for i in range(256))
for t in range(256):
weight_bg += histogram[t]
if weight_bg == 0:
continue
weight_fg = total - weight_bg
if weight_fg == 0:
break
sum_bg += t * histogram[t]
mean_bg = sum_bg / weight_bg
mean_fg = (total_sum - sum_bg) / weight_fg
variance = weight_bg * weight_fg * (mean_bg - mean_fg) ** 2
if variance > best_variance:
best_variance = variance
best_thresh = t
return binarize_simple(img, best_thresh)
Color Isolation
Extract text by isolating specific color channels:
# color_isolation.py
from PIL import Image
import numpy as np
def isolate_dark_text(img, max_brightness=100):
"""Keep only dark pixels (likely text on light background)."""
arr = np.array(img.convert("L"))
result = np.where(arr < max_brightness, 0, 255).astype(np.uint8)
return Image.fromarray(result)
def isolate_color_channel(img, channel="red"):
"""Extract a specific color channel."""
r, g, b = img.split()
channels = {"red": r, "green": g, "blue": b}
return channels.get(channel, r)
def isolate_colored_text(img, target_rgb, tolerance=50):
"""Keep only pixels close to a target color."""
arr = np.array(img.convert("RGB"))
target = np.array(target_rgb)
diff = np.abs(arr.astype(int) - target.astype(int)).sum(axis=2)
mask = diff < tolerance
result = np.where(mask, 0, 255).astype(np.uint8)
return Image.fromarray(result)
Complete Preprocessing Pipeline
# pipeline.py
from PIL import Image, ImageEnhance, ImageFilter
import base64
import io
def preprocess_captcha(image_path, config=None):
"""Full preprocessing pipeline."""
if config is None:
config = {
"grayscale": True,
"contrast": 2.0,
"sharpen": 1.5,
"denoise": True,
"threshold": 128,
}
img = Image.open(image_path)
# Step 1: Grayscale
if config.get("grayscale"):
img = img.convert("L")
# Step 2: Denoise
if config.get("denoise"):
img = img.filter(ImageFilter.MedianFilter(size=3))
# Step 3: Contrast
contrast = config.get("contrast", 1.0)
if contrast != 1.0:
img = ImageEnhance.Contrast(img.convert("L")).enhance(contrast)
# Step 4: Sharpen
sharpen = config.get("sharpen", 1.0)
if sharpen != 1.0:
img = ImageEnhance.Sharpness(img).enhance(sharpen)
# Step 5: Binarize
threshold = config.get("threshold")
if threshold:
img = img.point(lambda p: 255 if p > threshold else 0)
# Encode
buffer = io.BytesIO()
img.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode("ascii")
# Usage
b64 = preprocess_captcha("captcha.png", config={
"grayscale": True,
"contrast": 2.5,
"sharpen": 2.0,
"denoise": True,
"threshold": 120,
})
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| Text disappears after binarization | Threshold too high | Lower threshold (try 90-110) |
| Noise remains after filtering | Filter too weak | Increase MedianFilter size to 5 |
| Characters merge | Contrast too high | Reduce contrast factor |
| Preprocessing makes results worse | Image was already clean | Skip preprocessing for clean CAPTCHAs |
FAQ
Should I always preprocess?
No. Try without preprocessing first. Only add it when solve rates are below expectations on a specific CAPTCHA type.
Does preprocessing increase API cost?
No. Preprocessing happens locally before submission. The API cost is the same regardless.
Which step has the most impact?
Contrast enhancement and binarization typically have the biggest impact on noisy, low-contrast CAPTCHAs.
Related Guides
Preprocess for better results — start with CaptchaAI.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.