Basic Puppeteer + CAPTCHA solving is a solved problem. This guide covers the advanced patterns: stealth mode, request interception, iframe handling, multi-page form flows, and parallel browser solving.
Prerequisites
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
Stealth Puppeteer setup
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(StealthPlugin());
const API_KEY = "YOUR_API_KEY";
async function createBrowser() {
const browser = await puppeteer.launch({
headless: "new",
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-web-security",
"--disable-features=IsolateOrigins,site-per-process",
],
});
return browser;
}
CaptchaAI solver helper
function sleep(ms) {
return new Promise((r) => setTimeout(r, ms));
}
async function solveCaptcha(method, params) {
const submitResp = await fetch("https://ocr.captchaai.com/in.php", {
method: "POST",
body: new URLSearchParams({
key: API_KEY,
method,
json: "1",
...params,
}),
});
const submitData = await submitResp.json();
if (submitData.status !== 1)
throw new Error(`Submit: ${submitData.request}`);
const taskId = submitData.request;
for (let i = 0; i < 30; i++) {
await sleep(5000);
const pollResp = await fetch(
`https://ocr.captchaai.com/res.php?${new URLSearchParams({
key: API_KEY,
action: "get",
id: taskId,
json: "1",
})}`
);
const data = await pollResp.json();
if (data.status === 1) return data.request;
if (data.request === "ERROR_CAPTCHA_UNSOLVABLE") throw new Error("Unsolvable");
}
throw new Error("Timed out");
}
reCAPTCHA in iframes
reCAPTCHA v2 renders inside an iframe. Handling this correctly:
async function solveRecaptchaInIframe(page) {
// Wait for the reCAPTCHA iframe to load
await page.waitForSelector('iframe[src*="recaptcha"]', { timeout: 10000 });
// Get the sitekey from the main page
const sitekey = await page.evaluate(() => {
// From data-sitekey attribute
const el = document.querySelector("[data-sitekey]");
if (el) return el.getAttribute("data-sitekey");
// From iframe src
const iframe = document.querySelector('iframe[src*="recaptcha"]');
if (iframe) {
const match = iframe.src.match(/k=([A-Za-z0-9_-]{40})/);
if (match) return match[1];
}
return null;
});
if (!sitekey) throw new Error("Sitekey not found");
// Solve via CaptchaAI
const token = await solveCaptcha("userrecaptcha", {
googlekey: sitekey,
pageurl: page.url(),
});
// Inject token into the main page (not the iframe)
await page.evaluate((t) => {
document.getElementById("g-recaptcha-response").value = t;
document.getElementById("g-recaptcha-response").style.display = "block";
// Trigger the callback
if (typeof ___grecaptcha_cfg !== "undefined") {
const clients = ___grecaptcha_cfg.clients;
for (const key in clients) {
const client = clients[key];
for (const prop in client) {
try {
if (client[prop] && typeof client[prop].callback === "function") {
client[prop].callback(t);
}
} catch {}
}
}
}
}, token);
return token;
}
Request interception for CAPTCHA detection
async function interceptAndSolve(page, url) {
const captchaData = {};
// Intercept requests to detect CAPTCHA type
await page.setRequestInterception(true);
page.on("request", (request) => {
const requestUrl = request.url();
if (requestUrl.includes("recaptcha/api2")) {
captchaData.type = "recaptcha";
const match = requestUrl.match(/k=([A-Za-z0-9_-]{40})/);
if (match) captchaData.sitekey = match[1];
}
if (requestUrl.includes("challenges.cloudflare.com/turnstile")) {
captchaData.type = "turnstile";
}
if (requestUrl.includes("geetest") || requestUrl.includes("gt=")) {
captchaData.type = "geetest";
}
request.continue();
});
// Intercept responses for CAPTCHA parameters
page.on("response", async (response) => {
if (response.url().includes("geetest") || response.url().includes("register")) {
try {
const data = await response.json();
if (data.gt && data.challenge) {
captchaData.gt = data.gt;
captchaData.challenge = data.challenge;
}
} catch {}
}
});
await page.goto(url, { waitUntil: "networkidle2" });
// Now solve based on detected type
if (captchaData.type === "recaptcha" && captchaData.sitekey) {
return await solveCaptcha("userrecaptcha", {
googlekey: captchaData.sitekey,
pageurl: url,
});
}
if (captchaData.type === "turnstile") {
const sitekey = await page.evaluate(() => {
const el = document.querySelector("[data-sitekey]");
return el ? el.getAttribute("data-sitekey") : null;
});
if (sitekey) {
return await solveCaptcha("turnstile", { sitekey, pageurl: url });
}
}
if (captchaData.type === "geetest" && captchaData.gt) {
return await solveCaptcha("geetest", {
gt: captchaData.gt,
challenge: captchaData.challenge,
pageurl: url,
});
}
return null;
}
Multi-page form with CAPTCHA
async function multiPageFormFlow(browser, startUrl, formSteps) {
const page = await browser.newPage();
for (let i = 0; i < formSteps.length; i++) {
const step = formSteps[i];
console.log(`Step ${i + 1}: ${step.description}`);
if (i === 0) {
await page.goto(startUrl, { waitUntil: "networkidle2" });
}
// Fill form fields
for (const [selector, value] of Object.entries(step.fields || {})) {
await page.waitForSelector(selector, { visible: true });
await page.click(selector, { clickCount: 3 }); // Select all
await page.type(selector, value, { delay: 50 }); // Human-like typing
}
// Handle dropdowns
for (const [selector, value] of Object.entries(step.selects || {})) {
await page.select(selector, value);
}
// Check for CAPTCHA
const hasCaptcha = await page.evaluate(() => {
return !!(
document.querySelector("[data-sitekey]") ||
document.querySelector(".cf-turnstile") ||
document.querySelector(".geetest_holder")
);
});
if (hasCaptcha) {
console.log("CAPTCHA detected, solving...");
const token = await interceptAndSolve(page, page.url());
if (token) {
await page.evaluate((t) => {
const el = document.getElementById("g-recaptcha-response");
if (el) el.value = t;
const cf = document.querySelector('[name="cf-turnstile-response"]');
if (cf) cf.value = t;
}, token);
}
}
// Click next/submit
if (step.submit) {
await page.click(step.submit);
await page.waitForNavigation({ waitUntil: "networkidle2" });
}
}
return page;
}
// Usage
const browser = await createBrowser();
const page = await multiPageFormFlow(browser, "https://example.com/register", [
{
description: "Personal info",
fields: { "#name": "John Doe", "#email": "john@example.com" },
submit: "#next-btn",
},
{
description: "Address",
fields: { "#address": "123 Main St", "#city": "New York" },
selects: { "#state": "NY" },
submit: "#next-btn",
},
{
description: "Verification (has CAPTCHA)",
fields: {},
submit: "#submit-btn",
},
]);
Parallel browser solving
async function parallelSolve(urls, maxConcurrent = 3) {
const browser = await createBrowser();
const results = [];
let running = 0;
const processUrl = async (url) => {
const page = await browser.newPage();
try {
await page.goto(url, { waitUntil: "networkidle2" });
// Detect and extract sitekey
const sitekey = await page.evaluate(() => {
const el = document.querySelector("[data-sitekey]");
return el ? el.getAttribute("data-sitekey") : null;
});
if (sitekey) {
const token = await solveCaptcha("userrecaptcha", {
googlekey: sitekey,
pageurl: url,
});
results.push({ url, status: "solved", token });
} else {
results.push({ url, status: "no-captcha" });
}
} catch (error) {
results.push({ url, status: "error", error: error.message });
} finally {
await page.close();
}
};
// Process with concurrency limit
const queue = [...urls];
const workers = [];
for (let i = 0; i < Math.min(maxConcurrent, urls.length); i++) {
workers.push(
(async () => {
while (queue.length > 0) {
const url = queue.shift();
if (url) await processUrl(url);
}
})()
);
}
await Promise.all(workers);
await browser.close();
return results;
}
Screenshot-based CAPTCHA solving
async function solveScreenshotCaptcha(page, captchaSelector) {
const element = await page.$(captchaSelector);
if (!element) throw new Error("CAPTCHA element not found");
// Take a screenshot of just the CAPTCHA element
const screenshot = await element.screenshot({ encoding: "base64" });
// Solve via CaptchaAI
const answer = await solveCaptcha("base64", { body: screenshot });
// Type the answer
const input = await page.$(
'input[name="captcha"], input[name="code"], input.captcha-input'
);
if (input) {
await input.click({ clickCount: 3 });
await input.type(answer, { delay: 30 });
}
return answer;
}
Troubleshooting
| Symptom | Cause | Fix |
|---|---|---|
| Puppeteer detected as bot | Not using stealth plugin | Add puppeteer-extra-plugin-stealth |
| reCAPTCHA token injection fails | Multiple reCAPTCHA widgets | Find the correct g-recaptcha-response by index |
| Iframe switch doesn't work | Cross-origin iframe | Solve via API instead of clicking inside iframe |
page.goto hangs |
Page has infinite loading | Use timeout option and networkidle2 |
| Parallel pages crash browser | Out of memory | Lower concurrency or use --disable-dev-shm-usage |
Frequently asked questions
Should I use Puppeteer or Playwright?
Puppeteer has a larger ecosystem and more tutorials. Playwright has better API design and multi-browser support. Both work equally well with CaptchaAI.
Does stealth mode affect CAPTCHA solving?
No. CaptchaAI solves on its own workers. Stealth mode prevents the site from detecting Puppeteer, which is a separate concern from CAPTCHA solving.
How many parallel pages can I run?
Depends on available RAM. Each page uses ~50-100MB. On 8GB RAM, limit to 5-10 concurrent pages.
Summary
Advanced Puppeteer + CaptchaAI: use stealth mode for stealth-configuredion, request interception for CAPTCHA discovery, iframe-aware token injection, multi-page form handling, and parallel browser solving for throughput.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.