How to Automate Product Screenshot Capture for E-Commerce Catalogs

2026-04-19 | Tags: [screenshot-api, use-cases, e-commerce, automation, tutorials]

E-commerce teams spend hours manually screenshotting product pages — for price audits, visual QA, competitor tracking, and catalog maintenance. A screenshot API automates this entirely.

The patterns are simple: iterate over a list of URLs, capture each page, store the result. What changes between use cases is the list source and what you do with the output.

Use Case 1: Your Own Product Catalog

Capture screenshots of your live product pages for visual QA — catching layout breaks, missing images, or incorrect pricing before customers see them.

import requests
import json
import os
from datetime import date

def audit_product_pages(product_urls: list[str], api_key: str, output_dir: str):
    """Screenshot all product pages and flag any that fail to capture."""
    os.makedirs(output_dir, exist_ok=True)
    today = date.today().isoformat()
    failures = []

    for url in product_urls:
        # Use URL slug as filename
        slug = url.rstrip("/").split("/")[-1] or "index"
        output_path = os.path.join(output_dir, f"{today}-{slug}.png")

        resp = requests.get(
            "https://hermesforge.dev/api/screenshot",
            params={
                "url": url,
                "width": 1440,
                "format": "png",
                "full_page": "true",
                "wait_for": "networkidle",
                "key": api_key
            },
            timeout=30
        )

        if resp.status_code == 200:
            with open(output_path, "wb") as f:
                f.write(resp.content)
            print(f"OK  {slug}")
        else:
            failures.append({"url": url, "status": resp.status_code})
            print(f"ERR {slug} ({resp.status_code})")

    return failures

# Load URLs from your catalog export
with open("catalog_urls.txt") as f:
    urls = [line.strip() for line in f if line.strip()]

failures = audit_product_pages(urls, "YOUR_API_KEY", "screenshots/catalog")

if failures:
    print(f"\n{len(failures)} pages failed:")
    for f in failures:
        print(f"  {f['url']} → HTTP {f['status']}")

Run this nightly. Compare screenshots between runs to catch visual regressions — images disappearing, price display bugs, layout shifts on new deploys.

Use Case 2: Competitor Price Monitoring

Track how competitor product pages look over time — catching price changes, promotional banners, and out-of-stock flags that a text scrape might miss.

import hashlib
import time
import requests
import os

COMPETITORS = [
    {
        "name": "competitor-a",
        "urls": [
            "https://competitor-a.com/products/widget-pro",
            "https://competitor-a.com/products/widget-lite"
        ]
    },
    {
        "name": "competitor-b",
        "urls": [
            "https://competitor-b.com/shop/widget-pro/"
        ]
    }
]

def capture_competitor_page(url: str, competitor: str, api_key: str, output_dir: str):
    url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
    slug = f"{competitor}-{url_hash}"
    output_path = os.path.join(output_dir, f"{slug}.png")

    resp = requests.get(
        "https://hermesforge.dev/api/screenshot",
        params={
            "url": url,
            "width": 1440,
            "format": "webp",  # Smaller files for storage
            "full_page": "false",
            "wait_for": "networkidle",
            "key": api_key
        },
        timeout=30
    )

    if resp.status_code == 200:
        with open(output_path, "wb") as f:
            f.write(resp.content)
        return output_path

    return None

api_key = "YOUR_API_KEY"
output_dir = "screenshots/competitors"
os.makedirs(output_dir, exist_ok=True)

for competitor in COMPETITORS:
    for url in competitor["urls"]:
        result = capture_competitor_page(url, competitor["name"], api_key, output_dir)
        if result:
            print(f"Captured: {result}")
        time.sleep(2)  # Polite crawl rate

Store captures with a timestamp in the filename to build a timeline. Weekly diffs show when competitors run sales or change pricing.

Use Case 3: Marketplace Listing Sync

If you sell on Amazon, Etsy, or other marketplaces, screenshot your listings to verify your content is displaying correctly after edits:

MARKETPLACE_LISTINGS = [
    {"platform": "amazon", "url": "https://www.amazon.com/dp/B00EXAMPLE1", "asin": "B00EXAMPLE1"},
    {"platform": "etsy",   "url": "https://www.etsy.com/listing/123456789/your-product", "id": "123456789"}
]

def capture_listing(listing: dict, api_key: str, output_dir: str):
    platform = listing["platform"]
    listing_id = listing.get("asin") or listing.get("id")
    output_path = os.path.join(output_dir, f"{platform}-{listing_id}.png")

    resp = requests.get(
        "https://hermesforge.dev/api/screenshot",
        params={
            "url": listing["url"],
            "width": 1440,
            "format": "png",
            "full_page": "false",
            "wait_for": "networkidle",
            "key": api_key
        },
        timeout=30
    )

    if resp.status_code == 200:
        with open(output_path, "wb") as f:
            f.write(resp.content)
        return True

    return False

Use Case 4: Catalog-Scale Batch Processing

For large catalogs (thousands of SKUs), use a rate-aware batch processor with retry logic:

import time
import requests
import os
from typing import Iterator

def batched(items: list, size: int) -> Iterator[list]:
    for i in range(0, len(items), size):
        yield items[i:i + size]

def capture_catalog_batch(
    urls: list[str],
    api_key: str,
    output_dir: str,
    batch_size: int = 10,
    delay_between_batches: float = 5.0
):
    os.makedirs(output_dir, exist_ok=True)
    results = {"ok": 0, "failed": 0, "skipped": 0}

    for i, batch in enumerate(batched(urls, batch_size)):
        print(f"Batch {i+1}/{(len(urls) + batch_size - 1) // batch_size}")

        for url in batch:
            slug = url.rstrip("/").split("/")[-1]
            output_path = os.path.join(output_dir, f"{slug}.png")

            # Skip if already captured
            if os.path.exists(output_path):
                results["skipped"] += 1
                continue

            try:
                resp = requests.get(
                    "https://hermesforge.dev/api/screenshot",
                    params={
                        "url": url,
                        "width": 1440,
                        "format": "png",
                        "full_page": "true",
                        "key": api_key
                    },
                    timeout=30
                )

                if resp.status_code == 200:
                    with open(output_path, "wb") as f:
                        f.write(resp.content)
                    results["ok"] += 1
                elif resp.status_code == 429:
                    print("Rate limit — waiting 60s")
                    time.sleep(60)
                    # Retry once
                    resp = requests.get(
                        "https://hermesforge.dev/api/screenshot",
                        params={"url": url, "width": 1440, "format": "png", "key": api_key},
                        timeout=30
                    )
                    if resp.status_code == 200:
                        with open(output_path, "wb") as f:
                            f.write(resp.content)
                        results["ok"] += 1
                    else:
                        results["failed"] += 1
                else:
                    results["failed"] += 1

            except requests.Timeout:
                results["failed"] += 1
                print(f"Timeout: {url}")

            time.sleep(0.5)  # 2 req/sec within batch

        if i < (len(urls) // batch_size):
            time.sleep(delay_between_batches)

    print(f"\nDone: {results['ok']} captured, {results['skipped']} skipped, {results['failed']} failed")
    return results

Rate Limit Planning

Catalog size	Captures/day needed	Recommended tier
< 50 SKUs	≤ 50	Free (50/day)
50–200 SKUs	≤ 200	Starter ($4/30-day, 200/day)
200–1,000 SKUs	≤ 1,000	Pro ($9/30-day, 1,000/day)
1,000+ SKUs	≤ 5,000	Business ($29/30-day, 5,000/day)

For catalogs larger than 5,000 SKUs: capture only changed or high-priority SKUs per day. Use a change-detection flag in your database to trigger recapture only on product updates.

Storing and Using Catalog Screenshots

For a simple internal workflow, store captures in a dated directory:

screenshots/
  catalog/
    2026-05-21-widget-pro.png
    2026-05-21-widget-lite.png
  competitors/
    2026-05-21-competitor-a-abc12345.webp

For a production system, upload to S3 or similar object storage and store the URL in your product database alongside each SKU. This lets your CMS display the latest screenshot for any product page directly.

Hermesforge Screenshot API: Get an API key. 50 screenshots/day free — enough to cover any catalog under 50 SKUs with no sign-up required.