Government GIS portals, county assessor systems, and mapping platforms protect geospatial queries with Image and OCR CAPTCHAs. These portals serve parcel boundaries, zoning designations, flood zones, and property assessments — data that's valuable for real estate analysis, urban planning, and environmental research. Here's how to handle the CAPTCHAs.
CAPTCHA Patterns on GIS Portals
| Portal type | CAPTCHA type | Trigger |
|---|---|---|
| County GIS/assessor | Image text CAPTCHA | Parcel search queries |
| State geospatial portals | Custom CAPTCHA | Data download requests |
| USGS data portals | reCAPTCHA v2 | Bulk data access |
| Municipal zoning maps | Image CAPTCHA | Repeated property lookups |
| Environmental databases | Math CAPTCHA | Report generation |
| Flood zone lookup | Image text CAPTCHA | Address queries |
GIS Data Extractor
import requests
import base64
import time
import re
class GISDataExtractor:
def __init__(self, api_key):
self.api_key = api_key
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
})
def lookup_parcel(self, portal_url, parcel_id):
"""Look up parcel data by ID, solving CAPTCHAs as needed."""
response = self.session.get(
f"{portal_url}/parcel", params={"id": parcel_id}
)
if self._has_image_captcha(response.text):
captcha_url = self._extract_captcha_url(response.text, portal_url)
captcha_text = self._solve_captcha(captcha_url)
# Re-submit with solved CAPTCHA
response = self.session.post(f"{portal_url}/parcel", data={
"id": parcel_id,
"captcha": captcha_text,
**self._extract_hidden_fields(response.text)
})
return self._parse_parcel_data(response.text)
def search_by_address(self, portal_url, address):
"""Search GIS records by street address."""
response = self.session.get(
f"{portal_url}/search", params={"address": address}
)
if self._has_image_captcha(response.text):
captcha_url = self._extract_captcha_url(response.text, portal_url)
captcha_text = self._solve_captcha(captcha_url)
response = self.session.post(f"{portal_url}/search", data={
"address": address,
"captcha": captcha_text,
**self._extract_hidden_fields(response.text)
})
return self._parse_search_results(response.text)
def bulk_extract(self, portal_url, parcel_ids, delay=3):
"""Extract data for multiple parcels with rate limiting."""
results = {}
for parcel_id in parcel_ids:
try:
results[parcel_id] = self.lookup_parcel(portal_url, parcel_id)
except Exception as e:
results[parcel_id] = {"error": str(e)}
time.sleep(delay)
return results
def _has_image_captcha(self, html):
return bool(re.search(
r'captcha|verification.?image|security.?code',
html, re.IGNORECASE
))
def _extract_captcha_url(self, html, base_url):
from bs4 import BeautifulSoup
from urllib.parse import urljoin
soup = BeautifulSoup(html, "html.parser")
img = (
soup.find("img", attrs={"src": lambda s: s and "captcha" in s.lower()}) or
soup.find("img", {"id": re.compile(r"captcha", re.I)}) or
soup.find("img", {"class": re.compile(r"captcha", re.I)})
)
if img and img.get("src"):
return urljoin(base_url, img["src"])
raise ValueError("CAPTCHA image not found")
def _solve_captcha(self, captcha_url):
"""Download and solve image CAPTCHA."""
img_response = self.session.get(captcha_url)
img_base64 = base64.b64encode(img_response.content).decode("utf-8")
resp = requests.post("https://ocr.captchaai.com/in.php", data={
"key": self.api_key,
"method": "base64",
"body": img_base64,
"json": 1
})
task_id = resp.json()["request"]
for _ in range(30):
time.sleep(3)
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": self.api_key,
"action": "get",
"id": task_id,
"json": 1
})
data = result.json()
if data["status"] == 1:
return data["request"]
raise TimeoutError("CAPTCHA solve timed out")
def _extract_hidden_fields(self, html):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
fields = {}
for inp in soup.select("input[type='hidden']"):
name = inp.get("name")
if name:
fields[name] = inp.get("value", "")
return fields
def _parse_parcel_data(self, html):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
return {
"parcel_id": soup.select_one(".parcel-id, #parcelId")?.text?.strip(),
"owner": soup.select_one(".owner, .owner-name")?.text?.strip(),
"address": soup.select_one(".address, .situs")?.text?.strip(),
"zoning": soup.select_one(".zoning, .zone-code")?.text?.strip(),
"acreage": soup.select_one(".acreage, .area")?.text?.strip(),
"assessed_value": soup.select_one(".assessed, .value")?.text?.strip(),
"land_use": soup.select_one(".land-use, .use-code")?.text?.strip()
}
def _parse_search_results(self, html):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
results = []
for row in soup.select(".result-row, tr.parcel"):
results.append({
"parcel_id": row.select_one(".parcel-id")?.text?.strip(),
"address": row.select_one(".address")?.text?.strip(),
"owner": row.select_one(".owner")?.text?.strip()
})
return results
# Usage
extractor = GISDataExtractor("YOUR_API_KEY")
# Single parcel lookup
parcel = extractor.lookup_parcel(
"https://gis.county.example.gov",
"12-34-567-890"
)
print(f"Owner: {parcel['owner']}, Zoning: {parcel['zoning']}")
# Bulk extraction
parcels = extractor.bulk_extract(
"https://gis.county.example.gov",
["12-34-567-890", "12-34-567-891", "12-34-567-892"]
)
Coordinate-Based Extraction (JavaScript)
class GISExtractor {
constructor(apiKey) {
this.apiKey = apiKey;
}
async extractByCoordinates(portalUrl, lat, lng) {
const url = `${portalUrl}/identify?lat=${lat}&lng=${lng}`;
const response = await fetch(url);
const html = await response.text();
if (this.hasCaptcha(html)) {
return this.solveAndExtract(portalUrl, html, { lat, lng });
}
return this.parseGISData(html);
}
async extractRegion(portalUrl, bounds, gridSize = 0.01) {
const results = [];
const { north, south, east, west } = bounds;
for (let lat = south; lat <= north; lat += gridSize) {
for (let lng = west; lng <= east; lng += gridSize) {
try {
const data = await this.extractByCoordinates(portalUrl, lat, lng);
if (data.parcelId) results.push(data);
} catch (error) {
console.error(`Failed at ${lat},${lng}: ${error.message}`);
}
// Rate limit
await new Promise(r => setTimeout(r, 2000));
}
}
return results;
}
hasCaptcha(html) {
return /captcha|verification.?image|security.?code/i.test(html);
}
async solveAndExtract(portalUrl, html, params) {
const imgMatch = html.match(/src="([^"]*captcha[^"]*)"/i);
if (!imgMatch) throw new Error('CAPTCHA image not found');
const imgUrl = new URL(imgMatch[1], portalUrl).href;
const imgResp = await fetch(imgUrl);
const buffer = await imgResp.arrayBuffer();
const base64 = Buffer.from(buffer).toString('base64');
const submitResp = await fetch('https://ocr.captchaai.com/in.php', {
method: 'POST',
body: new URLSearchParams({
key: this.apiKey,
method: 'base64',
body: base64,
json: '1'
})
});
const { request: taskId } = await submitResp.json();
for (let i = 0; i < 30; i++) {
await new Promise(r => setTimeout(r, 3000));
const result = await fetch(
`https://ocr.captchaai.com/res.php?key=${this.apiKey}&action=get&id=${taskId}&json=1`
);
const data = await result.json();
if (data.status === 1) {
const response = await fetch(portalUrl, {
method: 'POST',
body: new URLSearchParams({
...params,
captcha: data.request
})
});
return this.parseGISData(await response.text());
}
}
throw new Error('CAPTCHA solve timed out');
}
parseGISData(html) {
return {
parcelId: html.match(/parcel.?id[^>]*>([^<]+)/i)?.[1]?.trim(),
zoning: html.match(/zon(?:e|ing)[^>]*>([^<]+)/i)?.[1]?.trim(),
acreage: html.match(/acreage|area[^>]*>([^<]+)/i)?.[1]?.trim(),
landUse: html.match(/land.?use[^>]*>([^<]+)/i)?.[1]?.trim()
};
}
}
// Usage
const gis = new GISExtractor('YOUR_API_KEY');
// Single coordinate lookup
const data = await gis.extractByCoordinates(
'https://gis.county.example.gov',
34.0522, -118.2437
);
// Extract entire region
const region = await gis.extractRegion('https://gis.county.example.gov', {
north: 34.10, south: 34.00, east: -118.20, west: -118.30
});
CAPTCHA Parameters for GIS Portals
| Parameter | Value | Use case |
|---|---|---|
method |
base64 |
Standard image CAPTCHA |
numeric |
1 |
Numeric-only CAPTCHAs |
min_len |
4 |
When character count is known |
max_len |
6 |
When character count is known |
language |
0 |
English/Latin characters |
textinstructions |
Custom | Math CAPTCHAs or formatted codes |
Troubleshooting
| Issue | Cause | Fix |
|---|---|---|
| CAPTCHA image loads broken | Session cookie required | Load search page first |
| Solved text rejected | Case sensitivity | Add case_sensitive=1 parameter |
| Portal returns different CAPTCHA | Session-specific CAPTCHA | Download and solve in same session |
| No parcel data after CAPTCHA | Missing hidden form fields | Extract all hidden inputs before submitting |
FAQ
Why do GIS portals use old-style image CAPTCHAs?
Government GIS systems are often built on legacy platforms that predate modern CAPTCHA services. Budget constraints and long procurement cycles mean these older CAPTCHAs persist.
How should I handle county-specific CAPTCHA formats?
Each county may use different CAPTCHA implementations. Use CaptchaAI's textinstructions parameter to describe the specific format — for example, "5 uppercase letters" or "solve the math equation."
Can I extract shapefile or GeoJSON data behind CAPTCHAs?
If the portal offers downloadable spatial data behind a CAPTCHA, solve the CAPTCHA to access the download link. CaptchaAI handles the CAPTCHA; then download the file normally.
Next Steps
Extract GIS data reliably — get your CaptchaAI API key and handle government portal CAPTCHAs automatically.
Discussions (0)
Join the conversation
Sign in to share your opinion.
Sign InNo comments yet.