How to Automate Product Screenshot Capture for E-Commerce Catalogs
E-commerce teams spend hours manually screenshotting product pages — for price audits, visual QA, competitor tracking, and catalog maintenance. A screenshot API automates this entirely.
The patterns are simple: iterate over a list of URLs, capture each page, store the result. What changes between use cases is the list source and what you do with the output.
Use Case 1: Your Own Product Catalog
Capture screenshots of your live product pages for visual QA — catching layout breaks, missing images, or incorrect pricing before customers see them.
import requests
import json
import os
from datetime import date
def audit_product_pages(product_urls: list[str], api_key: str, output_dir: str):
"""Screenshot all product pages and flag any that fail to capture."""
os.makedirs(output_dir, exist_ok=True)
today = date.today().isoformat()
failures = []
for url in product_urls:
# Use URL slug as filename
slug = url.rstrip("/").split("/")[-1] or "index"
output_path = os.path.join(output_dir, f"{today}-{slug}.png")
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
params={
"url": url,
"width": 1440,
"format": "png",
"full_page": "true",
"wait_for": "networkidle",
"key": api_key
},
timeout=30
)
if resp.status_code == 200:
with open(output_path, "wb") as f:
f.write(resp.content)
print(f"OK {slug}")
else:
failures.append({"url": url, "status": resp.status_code})
print(f"ERR {slug} ({resp.status_code})")
return failures
# Load URLs from your catalog export
with open("catalog_urls.txt") as f:
urls = [line.strip() for line in f if line.strip()]
failures = audit_product_pages(urls, "YOUR_API_KEY", "screenshots/catalog")
if failures:
print(f"\n{len(failures)} pages failed:")
for f in failures:
print(f" {f['url']} → HTTP {f['status']}")
Run this nightly. Compare screenshots between runs to catch visual regressions — images disappearing, price display bugs, layout shifts on new deploys.
Use Case 2: Competitor Price Monitoring
Track how competitor product pages look over time — catching price changes, promotional banners, and out-of-stock flags that a text scrape might miss.
import hashlib
import time
import requests
import os
COMPETITORS = [
{
"name": "competitor-a",
"urls": [
"https://competitor-a.com/products/widget-pro",
"https://competitor-a.com/products/widget-lite"
]
},
{
"name": "competitor-b",
"urls": [
"https://competitor-b.com/shop/widget-pro/"
]
}
]
def capture_competitor_page(url: str, competitor: str, api_key: str, output_dir: str):
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
slug = f"{competitor}-{url_hash}"
output_path = os.path.join(output_dir, f"{slug}.png")
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
params={
"url": url,
"width": 1440,
"format": "webp", # Smaller files for storage
"full_page": "false",
"wait_for": "networkidle",
"key": api_key
},
timeout=30
)
if resp.status_code == 200:
with open(output_path, "wb") as f:
f.write(resp.content)
return output_path
return None
api_key = "YOUR_API_KEY"
output_dir = "screenshots/competitors"
os.makedirs(output_dir, exist_ok=True)
for competitor in COMPETITORS:
for url in competitor["urls"]:
result = capture_competitor_page(url, competitor["name"], api_key, output_dir)
if result:
print(f"Captured: {result}")
time.sleep(2) # Polite crawl rate
Store captures with a timestamp in the filename to build a timeline. Weekly diffs show when competitors run sales or change pricing.
Use Case 3: Marketplace Listing Sync
If you sell on Amazon, Etsy, or other marketplaces, screenshot your listings to verify your content is displaying correctly after edits:
MARKETPLACE_LISTINGS = [
{"platform": "amazon", "url": "https://www.amazon.com/dp/B00EXAMPLE1", "asin": "B00EXAMPLE1"},
{"platform": "etsy", "url": "https://www.etsy.com/listing/123456789/your-product", "id": "123456789"}
]
def capture_listing(listing: dict, api_key: str, output_dir: str):
platform = listing["platform"]
listing_id = listing.get("asin") or listing.get("id")
output_path = os.path.join(output_dir, f"{platform}-{listing_id}.png")
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
params={
"url": listing["url"],
"width": 1440,
"format": "png",
"full_page": "false",
"wait_for": "networkidle",
"key": api_key
},
timeout=30
)
if resp.status_code == 200:
with open(output_path, "wb") as f:
f.write(resp.content)
return True
return False
Use Case 4: Catalog-Scale Batch Processing
For large catalogs (thousands of SKUs), use a rate-aware batch processor with retry logic:
import time
import requests
import os
from typing import Iterator
def batched(items: list, size: int) -> Iterator[list]:
for i in range(0, len(items), size):
yield items[i:i + size]
def capture_catalog_batch(
urls: list[str],
api_key: str,
output_dir: str,
batch_size: int = 10,
delay_between_batches: float = 5.0
):
os.makedirs(output_dir, exist_ok=True)
results = {"ok": 0, "failed": 0, "skipped": 0}
for i, batch in enumerate(batched(urls, batch_size)):
print(f"Batch {i+1}/{(len(urls) + batch_size - 1) // batch_size}")
for url in batch:
slug = url.rstrip("/").split("/")[-1]
output_path = os.path.join(output_dir, f"{slug}.png")
# Skip if already captured
if os.path.exists(output_path):
results["skipped"] += 1
continue
try:
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
params={
"url": url,
"width": 1440,
"format": "png",
"full_page": "true",
"key": api_key
},
timeout=30
)
if resp.status_code == 200:
with open(output_path, "wb") as f:
f.write(resp.content)
results["ok"] += 1
elif resp.status_code == 429:
print("Rate limit — waiting 60s")
time.sleep(60)
# Retry once
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
params={"url": url, "width": 1440, "format": "png", "key": api_key},
timeout=30
)
if resp.status_code == 200:
with open(output_path, "wb") as f:
f.write(resp.content)
results["ok"] += 1
else:
results["failed"] += 1
else:
results["failed"] += 1
except requests.Timeout:
results["failed"] += 1
print(f"Timeout: {url}")
time.sleep(0.5) # 2 req/sec within batch
if i < (len(urls) // batch_size):
time.sleep(delay_between_batches)
print(f"\nDone: {results['ok']} captured, {results['skipped']} skipped, {results['failed']} failed")
return results
Rate Limit Planning
| Catalog size | Captures/day needed | Recommended tier |
|---|---|---|
| < 50 SKUs | ≤ 50 | Free (50/day) |
| 50–200 SKUs | ≤ 200 | Starter ($4/30-day, 200/day) |
| 200–1,000 SKUs | ≤ 1,000 | Pro ($9/30-day, 1,000/day) |
| 1,000+ SKUs | ≤ 5,000 | Business ($29/30-day, 5,000/day) |
For catalogs larger than 5,000 SKUs: capture only changed or high-priority SKUs per day. Use a change-detection flag in your database to trigger recapture only on product updates.
Storing and Using Catalog Screenshots
For a simple internal workflow, store captures in a dated directory:
screenshots/
catalog/
2026-05-21-widget-pro.png
2026-05-21-widget-lite.png
competitors/
2026-05-21-competitor-a-abc12345.webp
For a production system, upload to S3 or similar object storage and store the URL in your product database alongside each SKU. This lets your CMS display the latest screenshot for any product page directly.
Hermesforge Screenshot API: Get an API key. 50 screenshots/day free — enough to cover any catalog under 50 SKUs with no sign-up required.