Colly is a popular Go web scraping framework. Here's how to integrate CaptchaAI to handle CAPTCHAs in your Go scrapers.
CaptchaAI Client in Go
package captchaai
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"time"
)
type Client struct {
APIKey string
HTTPClient *http.Client
}
type apiResponse struct {
Status int `json:"status"`
Request string `json:"request"`
}
func NewClient(apiKey string) *Client {
return &Client{
APIKey: apiKey,
HTTPClient: &http.Client{Timeout: 30 * time.Second},
}
}
func (c *Client) SolveRecaptchaV2(sitekey, pageurl string) (string, error) {
// Submit task
data := url.Values{
"key": {c.APIKey},
"method": {"userrecaptcha"},
"googlekey": {sitekey},
"pageurl": {pageurl},
"json": {"1"},
}
resp, err := c.HTTPClient.PostForm("https://ocr.captchaai.com/in.php", data)
if err != nil {
return "", fmt.Errorf("submit error: %w", err)
}
defer resp.Body.Close()
var result apiResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", fmt.Errorf("decode error: %w", err)
}
if result.Status != 1 {
return "", fmt.Errorf("submit failed: %s", result.Request)
}
taskID := result.Request
// Poll for result
time.Sleep(15 * time.Second)
for i := 0; i < 24; i++ {
pollURL := fmt.Sprintf(
"https://ocr.captchaai.com/res.php?key=%s&action=get&id=%s&json=1",
c.APIKey, taskID,
)
resp, err := c.HTTPClient.Get(pollURL)
if err != nil {
time.Sleep(5 * time.Second)
continue
}
var pollResult apiResponse
json.NewDecoder(resp.Body).Decode(&pollResult)
resp.Body.Close()
if pollResult.Status == 1 {
return pollResult.Request, nil
}
if pollResult.Request != "CAPCHA_NOT_READY" {
return "", fmt.Errorf("solve error: %s", pollResult.Request)
}
time.Sleep(5 * time.Second)
}
return "", errors.New("solve timeout")
}
Colly Integration
package main
import (
"fmt"
"log"
"os"
"strings"
"github.com/gocolly/colly/v2"
)
func main() {
apiKey := os.Getenv("CAPTCHAAI_API_KEY")
solver := captchaai.NewClient(apiKey)
c := colly.NewCollector(
colly.AllowedDomains("example.com"),
colly.MaxDepth(2),
)
// Detect CAPTCHA pages
c.OnHTML("[data-sitekey]", func(e *colly.HTMLElement) {
sitekey := e.Attr("data-sitekey")
pageURL := e.Request.URL.String()
log.Printf("CAPTCHA detected on %s, solving...", pageURL)
token, err := solver.SolveRecaptchaV2(sitekey, pageURL)
if err != nil {
log.Printf("Solve failed: %v", err)
return
}
log.Printf("CAPTCHA solved, token length: %d", len(token))
// Post form with token
err = c.Post(pageURL, map[string]string{
"g-recaptcha-response": token,
})
if err != nil {
log.Printf("Form submit failed: %v", err)
}
})
// Extract data
c.OnHTML("table tr", func(e *colly.HTMLElement) {
cols := []string{}
e.ForEach("td", func(_ int, td *colly.HTMLElement) {
cols = append(cols, strings.TrimSpace(td.Text))
})
if len(cols) > 0 {
fmt.Printf("Row: %s\n", strings.Join(cols, " | "))
}
})
c.OnError(func(r *colly.Response, err error) {
log.Printf("Error %s: %v", r.Request.URL, err)
})
c.Visit("https://example.com/data")
}
Colly with Rate Limiting
package main
import (
"time"
"github.com/gocolly/colly/v2"
)
func main() {
c := colly.NewCollector()
// Rate limit: 1 request per 3 seconds per domain
c.Limit(&colly.LimitRule{
DomainGlob: "*",
Parallelism: 1,
Delay: 3 * time.Second,
RandomDelay: 2 * time.Second,
})
// ... CAPTCHA handling as above ...
c.Visit("https://example.com")
}
Turnstile Solving in Go
func (c *Client) SolveTurnstile(sitekey, pageurl string) (string, error) {
data := url.Values{
"key": {c.APIKey},
"method": {"turnstile"},
"sitekey": {sitekey},
"pageurl": {pageurl},
"json": {"1"},
}
resp, err := c.HTTPClient.PostForm("https://ocr.captchaai.com/in.php", data)
if err != nil {
return "", fmt.Errorf("submit error: %w", err)
}
defer resp.Body.Close()
var result apiResponse
json.NewDecoder(resp.Body).Decode(&result)
if result.Status != 1 {
return "", fmt.Errorf("submit failed: %s", result.Request)
}
// Poll (same as reCAPTCHA)
time.Sleep(5 * time.Second)
for i := 0; i < 20; i++ {
pollURL := fmt.Sprintf(
"https://ocr.captchaai.com/res.php?key=%s&action=get&id=%s&json=1",
c.APIKey, result.Request,
)
resp, err := c.HTTPClient.Get(pollURL)
if err != nil {
time.Sleep(3 * time.Second)
continue
}
var pr apiResponse
json.NewDecoder(resp.Body).Decode(&pr)
resp.Body.Close()
if pr.Status == 1 {
return pr.Request, nil
}
if pr.Request != "CAPCHA_NOT_READY" {
return "", fmt.Errorf("error: %s", pr.Request)
}
time.Sleep(3 * time.Second)
}
return "", errors.New("timeout")
}
FAQ
Why use Colly over other Go scrapers?
Colly is the most popular Go scraping framework with built-in caching, rate limiting, and concurrent request handling. It pairs well with CaptchaAI's HTTP API.
Can I use Colly with headless browsers?
For pages requiring JavaScript rendering, use chromedp or rod alongside Colly. Use Colly for static pages and the headless browser for CAPTCHA-protected dynamic pages.
Is CaptchaAI's API compatible with Go?
Yes. CaptchaAI uses standard HTTP endpoints that work with Go's net/http package. No SDK is required.
Related Guides
Add CAPTCHA solving to your Go scrapers — get CaptchaAI.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.