Visual Regression Testing in CI/CD Pipelines with the Screenshot API
Visual regression testing catches UI changes that unit and integration tests miss: a CSS rule that collapses a layout, a font change that breaks alignment, a dependency upgrade that shifts a component. The Screenshot API makes this tractable in CI without running a browser in your pipeline — capture the deployed preview URL, compare against baseline, fail the build on unexpected changes.
The Core Pattern
Visual regression works in two phases:
- Baseline: capture the expected state (usually from
mainor a stable release branch) - Comparison: capture the current state (from the PR branch or staging deploy) and diff against baseline
import requests
import hashlib
import os
from pathlib import Path
HERMES_API_KEY = os.environ["HERMES_API_KEY"]
def capture_page(url: str, width: int = 1280, wait_ms: int = 2000) -> bytes:
"""Capture a page as PNG for comparison."""
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
headers={"X-API-Key": HERMES_API_KEY},
params={
"url": url,
"format": "png",
"width": width,
"full_page": True,
"wait": wait_ms,
},
timeout=90,
)
resp.raise_for_status()
return resp.content
def pixel_diff(baseline: bytes, current: bytes) -> dict:
"""
Compare two PNG images pixel-by-pixel.
Returns diff percentage and whether images match dimensions.
"""
from PIL import Image
import io
import numpy as np
img_a = Image.open(io.BytesIO(baseline)).convert("RGB")
img_b = Image.open(io.BytesIO(current)).convert("RGB")
if img_a.size != img_b.size:
return {
"match": False,
"diff_pct": 100.0,
"dimension_match": False,
"size_a": img_a.size,
"size_b": img_b.size,
}
arr_a = np.array(img_a, dtype=np.int16)
arr_b = np.array(img_b, dtype=np.int16)
diff = np.abs(arr_a - arr_b)
changed = np.any(diff > 10, axis=2) # 10/255 tolerance for antialiasing
diff_pct = float(changed.sum()) / changed.size * 100
return {
"match": diff_pct < 0.1, # <0.1% changed pixels = pass
"diff_pct": round(diff_pct, 4),
"dimension_match": True,
"size": img_a.size,
}
Baseline Management
Store baselines in S3 or a shared directory, keyed by page name and branch:
import boto3
import json
from datetime import datetime, timezone
s3 = boto3.client("s3")
BASELINE_BUCKET = os.environ.get("BASELINE_BUCKET", "")
BASELINE_DIR = Path(os.environ.get("BASELINE_DIR", ".visual-baselines"))
def get_baseline(page_name: str, branch: str = "main") -> bytes | None:
"""Retrieve the baseline screenshot for a page."""
if BASELINE_BUCKET:
try:
obj = s3.get_object(
Bucket=BASELINE_BUCKET,
Key=f"baselines/{branch}/{page_name}.png",
)
return obj["Body"].read()
except s3.exceptions.NoSuchKey:
return None
else:
path = BASELINE_DIR / branch / f"{page_name}.png"
return path.read_bytes() if path.exists() else None
def save_baseline(page_name: str, image: bytes, branch: str = "main"):
"""Save a new baseline."""
if BASELINE_BUCKET:
s3.put_object(
Bucket=BASELINE_BUCKET,
Key=f"baselines/{branch}/{page_name}.png",
Body=image,
ContentType="image/png",
Metadata={
"captured-at": datetime.now(timezone.utc).isoformat(),
"page": page_name,
"branch": branch,
},
)
else:
path = BASELINE_DIR / branch / f"{page_name}.png"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(image)
Running a Visual Regression Check
import sys
def check_page(
page_name: str,
current_url: str,
baseline_branch: str = "main",
threshold_pct: float = 0.1,
update_baseline: bool = False,
) -> dict:
"""
Capture current URL and compare to baseline.
update_baseline: if True, save current as new baseline (use for intentional updates).
"""
current = capture_page(current_url)
if update_baseline:
save_baseline(page_name, current, baseline_branch)
return {
"page": page_name,
"status": "baseline_updated",
"url": current_url,
}
baseline = get_baseline(page_name, baseline_branch)
if baseline is None:
# First run — save as baseline
save_baseline(page_name, current, baseline_branch)
return {
"page": page_name,
"status": "baseline_created",
"url": current_url,
}
result = pixel_diff(baseline, current)
passed = result["match"] and result.get("dimension_match", True)
return {
"page": page_name,
"url": current_url,
"status": "pass" if passed else "fail",
"diff_pct": result["diff_pct"],
"threshold": threshold_pct,
"dimension_ok": result.get("dimension_match", True),
}
def run_regression_suite(pages: list[dict], fail_fast: bool = False) -> int:
"""
Run visual regression for a list of pages.
Returns exit code: 0 = all pass, 1 = failures found.
"""
failures = []
for page in pages:
result = check_page(
page_name=page["name"],
current_url=page["url"],
threshold_pct=page.get("threshold", 0.1),
)
status = result["status"]
if status == "pass":
print(f" PASS {page['name']} ({result['diff_pct']}% diff)")
elif status in ("baseline_created", "baseline_updated"):
print(f" INIT {page['name']} — {status}")
else:
print(
f" FAIL {page['name']} — "
f"{result['diff_pct']}% diff (threshold: {result['threshold']}%)"
)
failures.append(result)
if fail_fast:
break
if failures:
print(f"\n{len(failures)} visual regression failure(s).")
return 1
print(f"\nAll visual checks passed.")
return 0
GitHub Actions Integration
# .github/workflows/visual-regression.yml
name: Visual Regression
on:
pull_request:
branches: [main]
workflow_dispatch:
inputs:
update_baselines:
description: 'Update baselines (for intentional UI changes)'
type: boolean
default: false
jobs:
visual-regression:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: pip install requests Pillow numpy boto3
- name: Deploy preview
id: deploy
run: |
# Deploy your app to a preview URL
# This step is specific to your hosting platform
echo "PREVIEW_URL=https://preview-${{ github.sha }}.yourapp.com" >> $GITHUB_OUTPUT
- name: Wait for preview to be ready
run: |
URL="${{ steps.deploy.outputs.PREVIEW_URL }}"
for i in $(seq 1 30); do
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
if [ "$STATUS" = "200" ]; then
echo "Preview ready"
exit 0
fi
echo "Waiting... ($STATUS)"
sleep 10
done
echo "Preview did not become ready" && exit 1
- name: Run visual regression
env:
HERMES_API_KEY: ${{ secrets.HERMES_API_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
BASELINE_BUCKET: ${{ secrets.BASELINE_BUCKET }}
PREVIEW_URL: ${{ steps.deploy.outputs.PREVIEW_URL }}
UPDATE_BASELINES: ${{ github.event.inputs.update_baselines }}
run: python scripts/visual_regression.py
- name: Upload diff artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: visual-diffs
path: .visual-diffs/
retention-days: 14
The Visual Regression Script
#!/usr/bin/env python3
# scripts/visual_regression.py
import os
import sys
import json
PREVIEW_URL = os.environ["PREVIEW_URL"].rstrip("/")
UPDATE_BASELINES = os.environ.get("UPDATE_BASELINES", "false").lower() == "true"
# Define the pages to check
PAGES = [
{"name": "homepage", "path": "/", "wait": 2000},
{"name": "pricing", "path": "/pricing", "wait": 1500},
{"name": "login", "path": "/login", "wait": 1500},
{"name": "dashboard", "path": "/app/dashboard", "wait": 3000},
{"name": "settings", "path": "/app/settings", "wait": 2000},
{"name": "docs-quickstart", "path": "/docs/quickstart", "wait": 1500},
]
pages_with_urls = [
{**page, "url": f"{PREVIEW_URL}{page['path']}"}
for page in PAGES
]
if UPDATE_BASELINES:
print("Updating baselines for all pages...")
for page in pages_with_urls:
img = capture_page(page["url"], wait_ms=page.get("wait", 2000))
save_baseline(page["name"], img, "main")
print(f" Updated: {page['name']}")
sys.exit(0)
exit_code = run_regression_suite(pages_with_urls)
sys.exit(exit_code)
Generating Visual Diff Images
When a check fails, generate a diff image to make the change visible:
def generate_diff_image(
baseline: bytes,
current: bytes,
output_path: str,
) -> str:
"""
Generate a side-by-side diff image highlighting changed pixels.
Returns the path to the diff image.
"""
from PIL import Image, ImageChops, ImageEnhance
import io
import numpy as np
img_a = Image.open(io.BytesIO(baseline)).convert("RGB")
img_b = Image.open(io.BytesIO(current)).convert("RGB")
if img_a.size != img_b.size:
# Cannot diff images of different sizes
img_b = img_b.resize(img_a.size, Image.LANCZOS)
# Create diff highlight: changed pixels → red overlay
arr_a = np.array(img_a, dtype=np.int16)
arr_b = np.array(img_b, dtype=np.int16)
changed = np.any(np.abs(arr_a - arr_b) > 10, axis=2)
diff_overlay = np.array(img_b.copy())
diff_overlay[changed] = [255, 0, 0] # Red = changed
diff_img = Image.fromarray(diff_overlay.astype(np.uint8))
# Side-by-side: baseline | current | diff
w, h = img_a.size
composite = Image.new("RGB", (w * 3, h))
composite.paste(img_a, (0, 0))
composite.paste(img_b, (w, 0))
composite.paste(diff_img, (w * 2, 0))
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
composite.save(output_path)
return output_path
Threshold Reference
| Page type | Recommended threshold | Rationale |
|---|---|---|
| Static marketing page | 0.05% | Should be pixel-perfect |
| Blog/docs page | 0.1% | Minor font rendering variation ok |
| Dashboard with data | 0.5% | Dynamic data values change |
| Page with animations | 1.0% | Capture timing variance |
| Map or canvas elements | 2.0% | Tile loading is non-deterministic |
Set per-page thresholds in the PAGES list rather than using a global threshold. A login page and a data dashboard have fundamentally different stability characteristics.
Handling Dynamic Content
Pages with timestamps, user names, or live data require masking before comparison:
def capture_with_masking(
url: str,
css_selectors: list[str],
wait_ms: int = 2000,
) -> bytes:
"""
Capture a page with dynamic elements masked to a solid color.
Uses the Screenshot API's JS injection parameter to apply masks.
"""
mask_js = "; ".join([
f"document.querySelectorAll('{sel}').forEach(el => "
f"{{ el.style.visibility='hidden'; el.style.background='#ccc'; }})"
for sel in css_selectors
])
resp = requests.get(
"https://hermesforge.dev/api/screenshot",
headers={"X-API-Key": HERMES_API_KEY},
params={
"url": url,
"format": "png",
"width": 1280,
"full_page": True,
"wait": wait_ms,
"js": mask_js,
},
timeout=90,
)
resp.raise_for_status()
return resp.content
# Example: mask timestamps and user-specific content before comparison
current = capture_with_masking(
url=f"{PREVIEW_URL}/app/dashboard",
css_selectors=[
".timestamp",
".user-greeting",
"[data-testid='live-chart']",
],
)
Free API key at hermesforge.dev. 50 captures/day, no credit card required.