Smartproxy provides 55M+ residential IPs with a simple rotating gateway. Combined with CaptchaAI for CAPTCHA solving, you get a reliable pipeline for accessing CAPTCHA-protected sites through clean residential IPs.
Smartproxy Proxy Types
| Type | Pool | Best For | CAPTCHA Rate |
|---|---|---|---|
| Residential | 55M+ IPs | General scraping | Low |
| Datacenter | 100K+ IPs | High-speed data | Medium-high |
| Mobile | 10M+ IPs | Mobile-specific sites | Very low |
| ISP | Residential-grade static | Session-heavy work | Low |
Python Setup
Basic Requests
import requests
import time
SMARTPROXY_USER = "spuser"
SMARTPROXY_PASS = "sppassword"
SMARTPROXY_HOST = "gate.smartproxy.com"
SMARTPROXY_PORT = 10001
CAPTCHAAI_KEY = "YOUR_API_KEY"
CAPTCHAAI_URL = "https://ocr.captchaai.com"
proxies = {
"http": f"http://{SMARTPROXY_USER}:{SMARTPROXY_PASS}@{SMARTPROXY_HOST}:{SMARTPROXY_PORT}",
"https": f"http://{SMARTPROXY_USER}:{SMARTPROXY_PASS}@{SMARTPROXY_HOST}:{SMARTPROXY_PORT}",
}
def fetch_page(url):
return requests.get(url, proxies=proxies, timeout=30)
def solve_captcha(site_url, sitekey, captcha_type="recaptcha_v2"):
submit_data = {
"key": CAPTCHAAI_KEY,
"pageurl": site_url,
"json": 1,
}
if captcha_type == "turnstile":
submit_data["method"] = "turnstile"
submit_data["sitekey"] = sitekey
else:
submit_data["method"] = "userrecaptcha"
submit_data["googlekey"] = sitekey
resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data=submit_data)
data = resp.json()
if data["status"] != 1:
raise Exception(f"Submit failed: {data['request']}")
task_id = data["request"]
for _ in range(60):
time.sleep(5)
resp = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
"key": CAPTCHAAI_KEY,
"action": "get",
"id": task_id,
"json": 1,
})
data = resp.json()
if data["request"] == "CAPCHA_NOT_READY":
continue
if data["status"] == 1:
return data["request"]
raise Exception(f"Solve: {data['request']}")
raise TimeoutError("Timeout")
Sticky Sessions
import random
import string
def get_sticky_proxy(session_duration_minutes=10):
"""Create a sticky session proxy (same IP for duration)."""
session_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
proxy_url = (
f"http://{SMARTPROXY_USER}"
f"-session-{session_id}"
f"-sessionduration-{session_duration_minutes}"
f":{SMARTPROXY_PASS}@{SMARTPROXY_HOST}:{SMARTPROXY_PORT}"
)
return {"http": proxy_url, "https": proxy_url}
# Use same IP for entire CAPTCHA workflow
sticky = get_sticky_proxy(session_duration_minutes=10)
# Page load
resp = requests.get("https://target.com/form", proxies=sticky)
# Solve CAPTCHA
token = solve_captcha("https://target.com/form", "SITEKEY_HERE")
# Submit with same IP
resp = requests.post(
"https://target.com/submit",
data={"g-recaptcha-response": token},
proxies=sticky,
)
Country Targeting
# Smartproxy country targeting via username
def get_country_proxy(country_code):
proxy_url = (
f"http://{SMARTPROXY_USER}"
f"-country-{country_code}"
f":{SMARTPROXY_PASS}@{SMARTPROXY_HOST}:{SMARTPROXY_PORT}"
)
return {"http": proxy_url, "https": proxy_url}
# US proxy
us_proxy = get_country_proxy("us")
# UK proxy
uk_proxy = get_country_proxy("gb")
# Germany proxy
de_proxy = get_country_proxy("de")
Selenium Integration
from selenium import webdriver
from selenium.webdriver.common.by import By
def create_smartproxy_driver(country=None, sticky_session=None):
proxy_user = SMARTPROXY_USER
if country:
proxy_user += f"-country-{country}"
if sticky_session:
proxy_user += f"-session-{sticky_session}"
proxy_url = f"{proxy_user}:{SMARTPROXY_PASS}@{SMARTPROXY_HOST}:{SMARTPROXY_PORT}"
options = webdriver.ChromeOptions()
options.add_argument(f"--proxy-server=http://{SMARTPROXY_HOST}:{SMARTPROXY_PORT}")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--window-size=1920,1080")
# For authenticated proxies, use seleniumwire or extension
return webdriver.Chrome(options=options)
def scrape_with_captcha(url, country="us"):
session_id = "".join(random.choices(string.ascii_lowercase, k=8))
driver = create_smartproxy_driver(country=country, sticky_session=session_id)
try:
driver.get(url)
time.sleep(3)
sitekey = driver.execute_script(
"return document.querySelector('[data-sitekey]')?.getAttribute('data-sitekey')"
)
if sitekey:
token = solve_captcha(url, sitekey)
driver.execute_script(f"""
document.querySelector('#g-recaptcha-response').value = '{token}';
""")
driver.find_element(By.CSS_SELECTOR, "form").submit()
time.sleep(3)
return driver.page_source
finally:
driver.quit()
Node.js Integration
const axios = require("axios");
const HttpsProxyAgent = require("https-proxy-agent");
const CAPTCHAAI_KEY = "YOUR_API_KEY";
function getSmartproxyAgent(options = {}) {
let user = "spuser";
if (options.country) user += `-country-${options.country}`;
if (options.session) user += `-session-${options.session}`;
return new HttpsProxyAgent(
`http://${user}:sppassword@gate.smartproxy.com:10001`
);
}
async function scrapeWithCaptcha(url, sitekey) {
const agent = getSmartproxyAgent({
country: "us",
session: `sess-${Date.now()}`,
});
// Fetch page through proxy
const pageResp = await axios.get(url, { httpsAgent: agent });
// Solve CAPTCHA via CaptchaAI (no proxy needed)
const submitResp = await axios.post(
"https://ocr.captchaai.com/in.php",
null,
{
params: {
key: CAPTCHAAI_KEY,
method: "userrecaptcha",
googlekey: sitekey,
pageurl: url,
json: 1,
},
}
);
const taskId = submitResp.data.request;
// Poll for result
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
const result = await axios.get("https://ocr.captchaai.com/res.php", {
params: {
key: CAPTCHAAI_KEY,
action: "get",
id: taskId,
json: 1,
},
});
if (result.data.request === "CAPCHA_NOT_READY") continue;
if (result.data.status === 1) return result.data.request;
}
throw new Error("Timeout");
}
Concurrent Scraping Pipeline
from concurrent.futures import ThreadPoolExecutor, as_completed
def process_url(url):
session_id = "".join(random.choices(string.ascii_lowercase, k=8))
proxy = get_sticky_proxy(10)
try:
resp = requests.get(url, proxies=proxy, timeout=30)
# Check if CAPTCHA is present (simplified detection)
if "data-sitekey" in resp.text:
import re
match = re.search(r'data-sitekey="([^"]+)"', resp.text)
if match:
sitekey = match.group(1)
token = solve_captcha(url, sitekey)
return {"url": url, "status": "solved", "token": token[:30]}
return {"url": url, "status": "no_captcha"}
except Exception as e:
return {"url": url, "status": "error", "error": str(e)}
urls = [
"https://site1.com/page",
"https://site2.com/page",
"https://site3.com/page",
]
with ThreadPoolExecutor(max_workers=5) as executor:
futures = {executor.submit(process_url, u): u for u in urls}
for future in as_completed(futures):
result = future.result()
print(f"[{result['status']}] {result['url']}")
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| 407 Proxy Auth Required | Wrong username/password format | Check Smartproxy dashboard for credentials |
| IP rotates mid-session | Not using sticky sessions | Add -session-ID to username |
| CAPTCHA on every request | Using datacenter endpoint | Switch to residential gateway |
| Slow connections | Congested geo-target | Try different country or city |
| Token rejected after solve | IP changed between load and submit | Use longer sticky session duration |
FAQ
Can I pass Smartproxy IPs to CaptchaAI?
Yes. CaptchaAI accepts a proxy parameter in the submit request. This makes CaptchaAI solve from the same IP as your browser.
Which Smartproxy plan works best for CAPTCHA workflows?
The residential plan with sticky sessions. Pay-as-you-go is cost-effective for moderate volumes. Enterprise plans offer guaranteed bandwidth.
How long should sticky sessions be?
10 minutes covers most CAPTCHA workflows (page load → solve → submit). Extend to 30 minutes for multi-step forms.
Related Guides
Pair Smartproxy's residential network with CaptchaAI — get your API key and start solving.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.