Oxylabs offers 100M+ residential IPs and 2M+ datacenter IPs — known for reliability and enterprise-grade infrastructure. This guide shows how to integrate Oxylabs proxies with CaptchaAI for CAPTCHA-protected scraping workflows.
Oxylabs Proxy Products
| Product | IPs | Speed | Use Case |
|---|---|---|---|
| Datacenter Proxies | 2M+ | Very fast | High-volume, speed-first |
| Residential Proxies | 100M+ | Medium | CAPTCHA-sensitive sites |
| ISP Proxies | Residential-grade | Fast | E-commerce, social |
| Mobile Proxies | 20M+ | Slow | Mobile-specific targets |
| SERP Scraper API | Managed | Fast | Google/Bing results |
Python Integration
Requests
import requests
import time
OXYLABS_USER = "customer-USERNAME"
OXYLABS_PASS = "PASSWORD"
OXYLABS_ENDPOINT = "pr.oxylabs.io:7777"
CAPTCHAAI_KEY = "YOUR_API_KEY"
CAPTCHAAI_URL = "https://ocr.captchaai.com"
# Residential rotating proxy
proxies = {
"http": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
"https": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
}
def fetch_page(url, country=None):
"""Fetch page through Oxylabs proxy."""
user = OXYLABS_USER
if country:
user += f"-cc-{country}"
proxy = {
"http": f"http://{user}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
"https": f"http://{user}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
}
return requests.get(
url,
proxies=proxy,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36"
},
timeout=30,
)
def solve_recaptcha(site_url, sitekey):
"""Solve via CaptchaAI."""
resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
"key": CAPTCHAAI_KEY,
"method": "userrecaptcha",
"googlekey": sitekey,
"pageurl": site_url,
"json": 1,
})
data = resp.json()
if data["status"] != 1:
raise Exception(f"Submit: {data['request']}")
task_id = data["request"]
for _ in range(60):
time.sleep(5)
resp = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
"key": CAPTCHAAI_KEY,
"action": "get",
"id": task_id,
"json": 1,
})
data = resp.json()
if data["request"] == "CAPCHA_NOT_READY":
continue
if data["status"] == 1:
return data["request"]
raise Exception(f"Solve: {data['request']}")
raise TimeoutError("Timeout")
Sticky Sessions
def get_oxylabs_sticky_proxy(session_id, country=None):
"""Create Oxylabs sticky session (same IP)."""
user = OXYLABS_USER + f"-sessid-{session_id}"
if country:
user += f"-cc-{country}"
proxy_url = f"http://{user}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}"
return {"http": proxy_url, "https": proxy_url}
# CAPTCHA workflow with sticky IP
import random, string
session = "".join(random.choices(string.ascii_lowercase, k=8))
proxy = get_oxylabs_sticky_proxy(session, country="us")
# All requests use the same IP
resp = requests.get("https://target.com/form", proxies=proxy)
# ... solve CAPTCHA ...
resp = requests.post("https://target.com/submit", proxies=proxy, data={
"g-recaptcha-response": token,
})
Oxylabs Web Scraper API + CaptchaAI
For sites where Oxylabs' built-in rendering isn't enough for CAPTCHAs:
def scrape_with_oxylabs_api(url):
"""Use Oxylabs Web Scraper API for rendering, CaptchaAI for CAPTCHAs."""
resp = requests.post(
"https://realtime.oxylabs.io/v1/queries",
auth=(OXYLABS_USER, OXYLABS_PASS),
json={
"source": "universal",
"url": url,
"render": "html",
"browser_instructions": [
{"type": "wait", "wait_time_s": 3},
],
},
)
result = resp.json()
html = result["results"][0]["content"]
# Check for CAPTCHA in rendered HTML
import re
sitekey_match = re.search(r'data-sitekey="([^"]+)"', html)
if sitekey_match:
sitekey = sitekey_match.group(1)
token = solve_recaptcha(url, sitekey)
return {"html": html, "captcha_solved": True, "token": token}
return {"html": html, "captcha_solved": False}
Datacenter vs Residential for CAPTCHA
# Datacenter: Fast but higher CAPTCHA rate
DC_PROXY = {
"http": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@dc.pr.oxylabs.io:10000",
"https": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@dc.pr.oxylabs.io:10000",
}
# Residential: Slower but lower CAPTCHA rate
RES_PROXY = {
"http": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@pr.oxylabs.io:7777",
"https": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@pr.oxylabs.io:7777",
}
def smart_proxy_selection(url, captcha_sensitive=True):
"""Pick proxy type based on target site sensitivity."""
if captcha_sensitive:
return RES_PROXY # Less likely to trigger CAPTCHA
return DC_PROXY # Faster, CaptchaAI handles any CAPTCHAs
Concurrent Pipeline
from concurrent.futures import ThreadPoolExecutor, as_completed
def process_url(url):
session = "".join(random.choices(string.ascii_lowercase, k=8))
proxy = get_oxylabs_sticky_proxy(session, country="us")
try:
resp = requests.get(url, proxies=proxy, timeout=30)
import re
match = re.search(r'data-sitekey="([^"]+)"', resp.text)
if match:
token = solve_recaptcha(url, match.group(1))
return {"url": url, "status": "solved"}
return {"url": url, "status": "no_captcha"}
except Exception as e:
return {"url": url, "status": "error", "error": str(e)}
urls = ["https://site1.com", "https://site2.com", "https://site3.com"]
with ThreadPoolExecutor(max_workers=5) as executor:
futures = {executor.submit(process_url, u): u for u in urls}
for future in as_completed(futures):
print(future.result())
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| 403 from proxy | IP blacklisted | Switch to residential or rotate |
| Auth failed | Wrong endpoint | Datacenter: dc.pr.oxylabs.io:10000, Residential: pr.oxylabs.io:7777 |
| Sticky session expires | Default 10 min | Create new session ID |
| CAPTCHA on every page | Datacenter IP recognized | Use residential proxy |
| Slow response | Residential network latency | Accept or switch to ISP proxy |
FAQ
Which Oxylabs product should I use with CaptchaAI?
Residential for CAPTCHA-heavy sites (fewer challenges). Datacenter for speed-first workflows where CaptchaAI handles all CAPTCHAs.
Can Oxylabs solve CAPTCHAs itself?
Oxylabs' Web Scraper API handles some JavaScript challenges but doesn't solve CAPTCHAs. Use CaptchaAI for reCAPTCHA, Turnstile, and other CAPTCHA types.
Is the pricing per GB or per request?
Residential: per GB. Datacenter: per IP. Check Oxylabs pricing page for current rates.
Related Guides
Combine Oxylabs' proxy infrastructure with CaptchaAI solving — get your API key.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.