Semaphore Patterns for CAPTCHA Concurrency Control

Sending 100 CAPTCHA tasks simultaneously sounds fast — until you hit rate limits. Semaphores let you cap the number of concurrent API calls to a safe level while keeping throughput high. This guide covers semaphore patterns for controlling CaptchaAI concurrency in Python and Node.js.

Why semaphores

Without semaphore	With semaphore
100 simultaneous requests	20 at a time
429 rate limit errors	No rate limit errors
Unpredictable timing	Steady throughput
Memory spikes	Controlled resource usage

A semaphore is a counter that limits how many tasks can run at the same time. When a task acquires the semaphore, the count decreases. When it finishes, the count increases. If the count is zero, new tasks wait.

Python: asyncio.Semaphore

Basic pattern

import asyncio
import aiohttp

API_KEY = "YOUR_API_KEY"
SUBMIT_URL = "https://ocr.captchaai.com/in.php"
RESULT_URL = "https://ocr.captchaai.com/res.php"


async def solve_one(session, sem, sitekey, page_url):
    """Solve one CAPTCHA within the semaphore limit."""
    async with sem:
        # Submit
        async with session.post(SUBMIT_URL, data={
            "key": API_KEY,
            "method": "userrecaptcha",
            "googlekey": sitekey,
            "pageurl": page_url,
            "json": "1",
        }) as resp:
            data = await resp.json()

        if data["status"] != 1:
            return {"url": page_url, "error": data["request"]}

        task_id = data["request"]

        # Poll (still within semaphore)
        for _ in range(24):
            await asyncio.sleep(5)
            async with session.get(RESULT_URL, params={
                "key": API_KEY, "action": "get", "id": task_id, "json": "1"
            }) as resp:
                result = await resp.json()

            if result["status"] == 1:
                return {"url": page_url, "token": result["request"]}
            if result["request"] != "CAPCHA_NOT_READY":
                return {"url": page_url, "error": result["request"]}

        return {"url": page_url, "error": "TIMEOUT"}


async def solve_batch(tasks, max_concurrent=20):
    sem = asyncio.Semaphore(max_concurrent)

    async with aiohttp.ClientSession() as session:
        coros = [
            solve_one(session, sem, t["sitekey"], t["url"])
            for t in tasks
        ]
        results = await asyncio.gather(*coros)

    solved = sum(1 for r in results if "token" in r)
    print(f"Solved {solved}/{len(results)}")
    return results

Split submit and poll semaphores

Submitting is fast (< 1s). Polling takes 15-60s. Using separate semaphores lets you submit faster:

async def solve_split_sems(session, submit_sem, poll_sem, sitekey, page_url):
    # Submit phase — short, limited to 30 concurrent
    async with submit_sem:
        async with session.post(SUBMIT_URL, data={
            "key": API_KEY,
            "method": "userrecaptcha",
            "googlekey": sitekey,
            "pageurl": page_url,
            "json": "1",
        }) as resp:
            data = await resp.json()

    if data["status"] != 1:
        return {"error": data["request"]}

    task_id = data["request"]

    # Poll phase — longer, limited to 50 concurrent
    async with poll_sem:
        for _ in range(24):
            await asyncio.sleep(5)
            async with session.get(RESULT_URL, params={
                "key": API_KEY, "action": "get", "id": task_id, "json": "1"
            }) as resp:
                result = await resp.json()

            if result["status"] == 1:
                return {"token": result["request"]}
            if result["request"] != "CAPCHA_NOT_READY":
                return {"error": result["request"]}

    return {"error": "TIMEOUT"}


async def main(tasks):
    submit_sem = asyncio.Semaphore(30)  # 30 concurrent submits
    poll_sem = asyncio.Semaphore(50)     # 50 concurrent polls

    async with aiohttp.ClientSession() as session:
        coros = [
            solve_split_sems(session, submit_sem, poll_sem, t["sitekey"], t["url"])
            for t in tasks
        ]
        return await asyncio.gather(*coros)

Node.js: Custom semaphore

Node.js doesn't have a built-in semaphore, but it's simple to build:

class Semaphore {
  constructor(max) {
    this.max = max;
    this.current = 0;
    this.queue = [];
  }

  acquire() {
    return new Promise(resolve => {
      if (this.current < this.max) {
        this.current++;
        resolve();
      } else {
        this.queue.push(resolve);
      }
    });
  }

  release() {
    this.current--;
    if (this.queue.length > 0) {
      this.current++;
      const next = this.queue.shift();
      next();
    }
  }
}

Using the semaphore

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY';
const sem = new Semaphore(20);

async function solveOne(sitekey, pageUrl) {
  await sem.acquire();
  try {
    const submit = await axios.post('https://ocr.captchaai.com/in.php', null, {
      params: {
        key: API_KEY,
        method: 'userrecaptcha',
        googlekey: sitekey,
        pageurl: pageUrl,
        json: 1,
      },
    });

    if (submit.data.status !== 1) {
      return { url: pageUrl, error: submit.data.request };
    }

    const taskId = submit.data.request;

    for (let i = 0; i < 24; i++) {
      await new Promise(r => setTimeout(r, 5000));
      const poll = await axios.get('https://ocr.captchaai.com/res.php', {
        params: { key: API_KEY, action: 'get', id: taskId, json: 1 },
      });

      if (poll.data.status === 1) {
        return { url: pageUrl, token: poll.data.request };
      }
      if (poll.data.request !== 'CAPCHA_NOT_READY') {
        return { url: pageUrl, error: poll.data.request };
      }
    }
    return { url: pageUrl, error: 'TIMEOUT' };
  } finally {
    sem.release();
  }
}

// Solve 100 tasks with max 20 concurrent
async function solveBatch(tasks) {
  const results = await Promise.all(
    tasks.map(t => solveOne(t.sitekey, t.url))
  );
  const solved = results.filter(r => r.token).length;
  console.log(`Solved: ${solved}/${results.length}`);
  return results;
}

Adaptive semaphore

Adjust concurrency based on error rates:

class AdaptiveSemaphore:
    def __init__(self, initial=20, min_val=5, max_val=50):
        self.value = initial
        self.min_val = min_val
        self.max_val = max_val
        self.sem = asyncio.Semaphore(initial)
        self.success_count = 0
        self.error_count = 0

    async def acquire(self):
        await self.sem.acquire()

    def release(self, success=True):
        self.sem.release()
        if success:
            self.success_count += 1
        else:
            self.error_count += 1

        total = self.success_count + self.error_count
        if total % 20 == 0:
            self._adjust()

    def _adjust(self):
        error_rate = self.error_count / (self.success_count + self.error_count)

        if error_rate > 0.2 and self.value > self.min_val:
            self.value = max(self.min_val, self.value - 5)
            self.sem = asyncio.Semaphore(self.value)
            print(f"Reduced concurrency to {self.value}")
        elif error_rate < 0.05 and self.value < self.max_val:
            self.value = min(self.max_val, self.value + 5)
            self.sem = asyncio.Semaphore(self.value)
            print(f"Increased concurrency to {self.value}")

        self.success_count = 0
        self.error_count = 0

Troubleshooting

Problem	Cause	Fix
All tasks blocked	Semaphore never released	Use `try/finally` to always release
Still getting 429s	Semaphore too high	Reduce max concurrent value
Too slow	Semaphore too low	Increase value or split submit/poll
Memory growing	Tasks queued forever	Set a timeout on `acquire()`