Visual Regression Testing in CI/CD Pipelines with the Screenshot API

2026-05-10 | Tags: [use-case, cicd, testing, visual-regression, github-actions, screenshot-api, devops]

Visual regression testing catches UI changes that unit and integration tests miss: a CSS rule that collapses a layout, a font change that breaks alignment, a dependency upgrade that shifts a component. The Screenshot API makes this tractable in CI without running a browser in your pipeline — capture the deployed preview URL, compare against baseline, fail the build on unexpected changes.

The Core Pattern

Visual regression works in two phases:

  1. Baseline: capture the expected state (usually from main or a stable release branch)
  2. Comparison: capture the current state (from the PR branch or staging deploy) and diff against baseline
import requests
import hashlib
import os
from pathlib import Path

HERMES_API_KEY = os.environ["HERMES_API_KEY"]


def capture_page(url: str, width: int = 1280, wait_ms: int = 2000) -> bytes:
    """Capture a page as PNG for comparison."""
    resp = requests.get(
        "https://hermesforge.dev/api/screenshot",
        headers={"X-API-Key": HERMES_API_KEY},
        params={
            "url":       url,
            "format":    "png",
            "width":     width,
            "full_page": True,
            "wait":      wait_ms,
        },
        timeout=90,
    )
    resp.raise_for_status()
    return resp.content


def pixel_diff(baseline: bytes, current: bytes) -> dict:
    """
    Compare two PNG images pixel-by-pixel.
    Returns diff percentage and whether images match dimensions.
    """
    from PIL import Image
    import io
    import numpy as np

    img_a = Image.open(io.BytesIO(baseline)).convert("RGB")
    img_b = Image.open(io.BytesIO(current)).convert("RGB")

    if img_a.size != img_b.size:
        return {
            "match":           False,
            "diff_pct":        100.0,
            "dimension_match": False,
            "size_a":          img_a.size,
            "size_b":          img_b.size,
        }

    arr_a = np.array(img_a, dtype=np.int16)
    arr_b = np.array(img_b, dtype=np.int16)

    diff       = np.abs(arr_a - arr_b)
    changed    = np.any(diff > 10, axis=2)   # 10/255 tolerance for antialiasing
    diff_pct   = float(changed.sum()) / changed.size * 100

    return {
        "match":           diff_pct < 0.1,   # <0.1% changed pixels = pass
        "diff_pct":        round(diff_pct, 4),
        "dimension_match": True,
        "size":            img_a.size,
    }

Baseline Management

Store baselines in S3 or a shared directory, keyed by page name and branch:

import boto3
import json
from datetime import datetime, timezone

s3            = boto3.client("s3")
BASELINE_BUCKET = os.environ.get("BASELINE_BUCKET", "")
BASELINE_DIR    = Path(os.environ.get("BASELINE_DIR", ".visual-baselines"))


def get_baseline(page_name: str, branch: str = "main") -> bytes | None:
    """Retrieve the baseline screenshot for a page."""
    if BASELINE_BUCKET:
        try:
            obj = s3.get_object(
                Bucket=BASELINE_BUCKET,
                Key=f"baselines/{branch}/{page_name}.png",
            )
            return obj["Body"].read()
        except s3.exceptions.NoSuchKey:
            return None
    else:
        path = BASELINE_DIR / branch / f"{page_name}.png"
        return path.read_bytes() if path.exists() else None


def save_baseline(page_name: str, image: bytes, branch: str = "main"):
    """Save a new baseline."""
    if BASELINE_BUCKET:
        s3.put_object(
            Bucket=BASELINE_BUCKET,
            Key=f"baselines/{branch}/{page_name}.png",
            Body=image,
            ContentType="image/png",
            Metadata={
                "captured-at": datetime.now(timezone.utc).isoformat(),
                "page":        page_name,
                "branch":      branch,
            },
        )
    else:
        path = BASELINE_DIR / branch / f"{page_name}.png"
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_bytes(image)

Running a Visual Regression Check

import sys


def check_page(
    page_name:   str,
    current_url: str,
    baseline_branch: str  = "main",
    threshold_pct: float  = 0.1,
    update_baseline: bool = False,
) -> dict:
    """
    Capture current URL and compare to baseline.

    update_baseline: if True, save current as new baseline (use for intentional updates).
    """
    current = capture_page(current_url)

    if update_baseline:
        save_baseline(page_name, current, baseline_branch)
        return {
            "page":    page_name,
            "status":  "baseline_updated",
            "url":     current_url,
        }

    baseline = get_baseline(page_name, baseline_branch)

    if baseline is None:
        # First run — save as baseline
        save_baseline(page_name, current, baseline_branch)
        return {
            "page":    page_name,
            "status":  "baseline_created",
            "url":     current_url,
        }

    result = pixel_diff(baseline, current)
    passed = result["match"] and result.get("dimension_match", True)

    return {
        "page":         page_name,
        "url":          current_url,
        "status":       "pass" if passed else "fail",
        "diff_pct":     result["diff_pct"],
        "threshold":    threshold_pct,
        "dimension_ok": result.get("dimension_match", True),
    }


def run_regression_suite(pages: list[dict], fail_fast: bool = False) -> int:
    """
    Run visual regression for a list of pages.
    Returns exit code: 0 = all pass, 1 = failures found.
    """
    failures = []

    for page in pages:
        result = check_page(
            page_name=page["name"],
            current_url=page["url"],
            threshold_pct=page.get("threshold", 0.1),
        )

        status = result["status"]
        if status == "pass":
            print(f"  PASS  {page['name']} ({result['diff_pct']}% diff)")
        elif status in ("baseline_created", "baseline_updated"):
            print(f"  INIT  {page['name']} — {status}")
        else:
            print(
                f"  FAIL  {page['name']} — "
                f"{result['diff_pct']}% diff (threshold: {result['threshold']}%)"
            )
            failures.append(result)
            if fail_fast:
                break

    if failures:
        print(f"\n{len(failures)} visual regression failure(s).")
        return 1

    print(f"\nAll visual checks passed.")
    return 0

GitHub Actions Integration

# .github/workflows/visual-regression.yml
name: Visual Regression

on:
  pull_request:
    branches: [main]
  workflow_dispatch:
    inputs:
      update_baselines:
        description: 'Update baselines (for intentional UI changes)'
        type: boolean
        default: false

jobs:
  visual-regression:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install dependencies
        run: pip install requests Pillow numpy boto3

      - name: Deploy preview
        id: deploy
        run: |
          # Deploy your app to a preview URL
          # This step is specific to your hosting platform
          echo "PREVIEW_URL=https://preview-${{ github.sha }}.yourapp.com" >> $GITHUB_OUTPUT

      - name: Wait for preview to be ready
        run: |
          URL="${{ steps.deploy.outputs.PREVIEW_URL }}"
          for i in $(seq 1 30); do
            STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
            if [ "$STATUS" = "200" ]; then
              echo "Preview ready"
              exit 0
            fi
            echo "Waiting... ($STATUS)"
            sleep 10
          done
          echo "Preview did not become ready" && exit 1

      - name: Run visual regression
        env:
          HERMES_API_KEY: ${{ secrets.HERMES_API_KEY }}
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          BASELINE_BUCKET: ${{ secrets.BASELINE_BUCKET }}
          PREVIEW_URL: ${{ steps.deploy.outputs.PREVIEW_URL }}
          UPDATE_BASELINES: ${{ github.event.inputs.update_baselines }}
        run: python scripts/visual_regression.py

      - name: Upload diff artifacts
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: visual-diffs
          path: .visual-diffs/
          retention-days: 14

The Visual Regression Script

#!/usr/bin/env python3
# scripts/visual_regression.py

import os
import sys
import json

PREVIEW_URL      = os.environ["PREVIEW_URL"].rstrip("/")
UPDATE_BASELINES = os.environ.get("UPDATE_BASELINES", "false").lower() == "true"

# Define the pages to check
PAGES = [
    {"name": "homepage",        "path": "/",                    "wait": 2000},
    {"name": "pricing",         "path": "/pricing",             "wait": 1500},
    {"name": "login",           "path": "/login",               "wait": 1500},
    {"name": "dashboard",       "path": "/app/dashboard",       "wait": 3000},
    {"name": "settings",        "path": "/app/settings",        "wait": 2000},
    {"name": "docs-quickstart", "path": "/docs/quickstart",     "wait": 1500},
]

pages_with_urls = [
    {**page, "url": f"{PREVIEW_URL}{page['path']}"}
    for page in PAGES
]

if UPDATE_BASELINES:
    print("Updating baselines for all pages...")
    for page in pages_with_urls:
        img = capture_page(page["url"], wait_ms=page.get("wait", 2000))
        save_baseline(page["name"], img, "main")
        print(f"  Updated: {page['name']}")
    sys.exit(0)

exit_code = run_regression_suite(pages_with_urls)
sys.exit(exit_code)

Generating Visual Diff Images

When a check fails, generate a diff image to make the change visible:

def generate_diff_image(
    baseline: bytes,
    current:  bytes,
    output_path: str,
) -> str:
    """
    Generate a side-by-side diff image highlighting changed pixels.
    Returns the path to the diff image.
    """
    from PIL import Image, ImageChops, ImageEnhance
    import io
    import numpy as np

    img_a = Image.open(io.BytesIO(baseline)).convert("RGB")
    img_b = Image.open(io.BytesIO(current)).convert("RGB")

    if img_a.size != img_b.size:
        # Cannot diff images of different sizes
        img_b = img_b.resize(img_a.size, Image.LANCZOS)

    # Create diff highlight: changed pixels → red overlay
    arr_a    = np.array(img_a, dtype=np.int16)
    arr_b    = np.array(img_b, dtype=np.int16)
    changed  = np.any(np.abs(arr_a - arr_b) > 10, axis=2)

    diff_overlay = np.array(img_b.copy())
    diff_overlay[changed] = [255, 0, 0]   # Red = changed
    diff_img = Image.fromarray(diff_overlay.astype(np.uint8))

    # Side-by-side: baseline | current | diff
    w, h = img_a.size
    composite = Image.new("RGB", (w * 3, h))
    composite.paste(img_a,    (0,     0))
    composite.paste(img_b,    (w,     0))
    composite.paste(diff_img, (w * 2, 0))

    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    composite.save(output_path)
    return output_path

Threshold Reference

Page type Recommended threshold Rationale
Static marketing page 0.05% Should be pixel-perfect
Blog/docs page 0.1% Minor font rendering variation ok
Dashboard with data 0.5% Dynamic data values change
Page with animations 1.0% Capture timing variance
Map or canvas elements 2.0% Tile loading is non-deterministic

Set per-page thresholds in the PAGES list rather than using a global threshold. A login page and a data dashboard have fundamentally different stability characteristics.

Handling Dynamic Content

Pages with timestamps, user names, or live data require masking before comparison:

def capture_with_masking(
    url:            str,
    css_selectors:  list[str],
    wait_ms:        int = 2000,
) -> bytes:
    """
    Capture a page with dynamic elements masked to a solid color.
    Uses the Screenshot API's JS injection parameter to apply masks.
    """
    mask_js = "; ".join([
        f"document.querySelectorAll('{sel}').forEach(el => "
        f"{{ el.style.visibility='hidden'; el.style.background='#ccc'; }})"
        for sel in css_selectors
    ])

    resp = requests.get(
        "https://hermesforge.dev/api/screenshot",
        headers={"X-API-Key": HERMES_API_KEY},
        params={
            "url":       url,
            "format":    "png",
            "width":     1280,
            "full_page": True,
            "wait":      wait_ms,
            "js":        mask_js,
        },
        timeout=90,
    )
    resp.raise_for_status()
    return resp.content


# Example: mask timestamps and user-specific content before comparison
current = capture_with_masking(
    url=f"{PREVIEW_URL}/app/dashboard",
    css_selectors=[
        ".timestamp",
        ".user-greeting",
        "[data-testid='live-chart']",
    ],
)

Free API key at hermesforge.dev. 50 captures/day, no credit card required.