Puppeteer is the go-to headless browser for Node.js automation. When target sites serve CAPTCHAs, CaptchaAI's API solves them externally — Puppeteer extracts the parameters, CaptchaAI returns the token, and Puppeteer injects it back.
Requirements
| Requirement | Details |
|---|---|
| Node.js 16+ | With npm |
| Puppeteer | npm install puppeteer |
| axios | npm install axios |
| CaptchaAI API key | From captchaai.com |
How It Works
- Puppeteer navigates to the page with the CAPTCHA
- Your script extracts the CAPTCHA site key from the DOM
- CaptchaAI solves the challenge server-side
- Your script injects the token and submits the form
Step 1: Create the Solver Module
// solver.js
const axios = require("axios");
const API_KEY = "YOUR_API_KEY";
const POLL_INTERVAL = 5000;
const MAX_ATTEMPTS = 60;
async function solveRecaptchaV2(siteKey, pageUrl) {
// Submit task
const submitResp = await axios.get("https://ocr.captchaai.com/in.php", {
params: {
key: API_KEY,
method: "userrecaptcha",
googlekey: siteKey,
pageurl: pageUrl,
},
});
if (!submitResp.data.startsWith("OK|")) {
throw new Error(`Submit failed: ${submitResp.data}`);
}
const taskId = submitResp.data.split("|")[1];
console.log(`Task submitted: ${taskId}`);
// Poll for result
for (let i = 0; i < MAX_ATTEMPTS; i++) {
await new Promise((r) => setTimeout(r, POLL_INTERVAL));
const result = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: taskId },
});
if (result.data === "CAPCHA_NOT_READY") continue;
if (result.data.startsWith("OK|")) {
return result.data.split("|")[1];
}
throw new Error(`Solve failed: ${result.data}`);
}
throw new Error("Solve timed out");
}
async function solveTurnstile(siteKey, pageUrl) {
const submitResp = await axios.get("https://ocr.captchaai.com/in.php", {
params: {
key: API_KEY,
method: "turnstile",
sitekey: siteKey,
pageurl: pageUrl,
},
});
if (!submitResp.data.startsWith("OK|")) {
throw new Error(`Submit failed: ${submitResp.data}`);
}
const taskId = submitResp.data.split("|")[1];
for (let i = 0; i < MAX_ATTEMPTS; i++) {
await new Promise((r) => setTimeout(r, POLL_INTERVAL));
const result = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: taskId },
});
if (result.data === "CAPCHA_NOT_READY") continue;
if (result.data.startsWith("OK|")) return result.data.split("|")[1];
throw new Error(`Solve failed: ${result.data}`);
}
throw new Error("Solve timed out");
}
module.exports = { solveRecaptchaV2, solveTurnstile };
Step 2: Set Up Puppeteer with Stealth
const puppeteer = require("puppeteer");
async function createBrowser() {
const browser = await puppeteer.launch({
headless: "new",
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-blink-features=AutomationControlled",
],
});
const page = await browser.newPage();
await page.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
);
// Hide automation indicators
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, "webdriver", { get: () => false });
});
return { browser, page };
}
Step 3: Solve reCAPTCHA on a Page
const { solveRecaptchaV2 } = require("./solver");
async function scrapeWithCaptcha(url) {
const { browser, page } = await createBrowser();
try {
await page.goto(url, { waitUntil: "networkidle2" });
// Extract site key
const siteKey = await page.$eval(
".g-recaptcha",
(el) => el.getAttribute("data-sitekey")
);
console.log("Site key:", siteKey);
// Solve with CaptchaAI
const token = await solveRecaptchaV2(siteKey, url);
console.log("Token received:", token.substring(0, 50));
// Inject token
await page.evaluate((token) => {
document.getElementById("g-recaptcha-response").innerHTML = token;
document.getElementById("g-recaptcha-response").style.display = "";
}, token);
// Submit the form
await page.click('button[type="submit"]');
await page.waitForNavigation({ waitUntil: "networkidle2" });
// Scrape the content
const content = await page.content();
console.log("Page loaded successfully");
return content;
} finally {
await browser.close();
}
}
Step 4: Handle Callbacks
Some sites use JavaScript callbacks instead of form submission:
// Trigger the reCAPTCHA callback
await page.evaluate((token) => {
// Method 1: Direct callback
if (typeof ___grecaptcha_cfg !== "undefined") {
const clients = ___grecaptcha_cfg.clients;
Object.keys(clients).forEach((key) => {
const client = clients[key];
// Find the callback function
const findCallback = (obj) => {
for (const prop in obj) {
if (typeof obj[prop] === "function") {
obj[prop](token);
return true;
}
if (typeof obj[prop] === "object" && obj[prop] !== null) {
if (findCallback(obj[prop])) return true;
}
}
return false;
};
findCallback(client);
});
}
}, token);
Full Working Example
const puppeteer = require("puppeteer");
const axios = require("axios");
const API_KEY = "YOUR_API_KEY";
async function solveCaptcha(siteKey, pageUrl) {
const submit = await axios.get("https://ocr.captchaai.com/in.php", {
params: {
key: API_KEY,
method: "userrecaptcha",
googlekey: siteKey,
pageurl: pageUrl,
},
});
const taskId = submit.data.split("|")[1];
while (true) {
await new Promise((r) => setTimeout(r, 5000));
const result = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: taskId },
});
if (result.data === "CAPCHA_NOT_READY") continue;
if (result.data.startsWith("OK|")) return result.data.split("|")[1];
throw new Error(result.data);
}
}
(async () => {
const browser = await puppeteer.launch({
headless: "new",
args: ["--disable-blink-features=AutomationControlled"],
});
const page = await browser.newPage();
try {
await page.goto("https://example.com/login", {
waitUntil: "networkidle2",
});
// Get the site key
const siteKey = await page.$eval(".g-recaptcha", (el) =>
el.getAttribute("data-sitekey")
);
// Solve
const token = await solveCaptcha(siteKey, page.url());
// Inject and submit
await page.evaluate((t) => {
document.getElementById("g-recaptcha-response").innerHTML = t;
}, token);
await page.click("#submit-btn");
await page.waitForNavigation();
console.log("Done:", page.url());
} finally {
await browser.close();
}
})();
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
page.$eval fails |
CAPTCHA loads after initial render | Use page.waitForSelector('.g-recaptcha') |
| Token doesn't work | Expired before submission | Inject immediately after receiving |
| Site detects Puppeteer | Missing stealth config | Use puppeteer-extra-plugin-stealth |
Navigation timeout |
Page didn't navigate after submit | Check if site uses AJAX instead of form post |
FAQ
Should I use headless or headed mode?
Headless mode works fine with CaptchaAI since the CAPTCHA is solved server-side. Use headed mode only for debugging.
Can I use Puppeteer with Cloudflare Turnstile?
Yes. Extract the data-sitekey from the .cf-turnstile div and use method=turnstile with CaptchaAI. See the solveTurnstile function above.
How do I handle multiple CAPTCHAs on one page?
Extract each site key separately and solve them in parallel using Promise.all().
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.