Use a Screenshot API for Visual Regression Testing
Your CSS change looks fine on your machine. But does it look fine on mobile? On a 4K monitor? In dark mode? Visual regression testing catches the bugs that unit tests can't — the ones your users see first.
Here's how to build a visual regression testing pipeline using a screenshot API and simple image comparison.
What Visual Regression Testing Catches
- Layout shifts — elements moving after a CSS change
- Typography changes — font size, weight, or spacing regressions
- Color changes — accidental overrides, dark mode breakage
- Responsive breakage — mobile layouts after desktop-focused changes
- Missing elements — components that fail to render after refactoring
- z-index issues — overlapping elements, hidden content
The Pipeline
PR opened → CI captures screenshots → Compare with baseline → Report diff → Pass/Fail
Architecture
┌──────────────┐ ┌───────────────┐ ┌──────────────┐
│ CI Pipeline │────>│ Screenshot API │────>│ Compare │
│ (on PR) │ │ (multiple │ │ with stored │
│ │ │ viewports) │ │ baselines │
└──────────────┘ └───────────────┘ └──────┬───────┘
│
┌──────▼───────┐
│ Diff > 0? │
│ Report it │
└──────────────┘
Step 1: Define Test Pages
Create a config listing the pages and viewports to test:
# visual_tests.py
TEST_PAGES = [
{
"name": "homepage",
"url": "https://staging.example.com/",
"viewports": ["desktop", "mobile", "tablet"],
},
{
"name": "pricing",
"url": "https://staging.example.com/pricing",
"viewports": ["desktop", "mobile"],
},
{
"name": "login",
"url": "https://staging.example.com/login",
"viewports": ["desktop", "mobile"],
"dark_mode": True,
},
{
"name": "dashboard",
"url": "https://staging.example.com/dashboard",
"viewports": ["desktop", "desktop_hd"],
"delay": 3000, # wait for charts to render
},
]
Step 2: Capture Screenshots
#!/usr/bin/env python3
"""Capture screenshots for visual regression testing."""
import requests
import hashlib
from pathlib import Path
API_BASE = "https://hermesforge.dev/api"
SCREENSHOT_DIR = Path("visual-tests/current")
def capture_page(page_config):
"""Capture screenshots at all configured viewports."""
results = []
SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True)
for viewport in page_config["viewports"]:
params = {
"url": page_config["url"],
"viewport": viewport,
"format": "png", # PNG for pixel-perfect comparison
"block_ads": "true",
}
if page_config.get("dark_mode"):
params["dark_mode"] = "true"
if page_config.get("delay"):
params["delay"] = page_config["delay"]
# Remove dynamic elements that cause false positives
params["js"] = """
// Remove cookie banners
document.querySelectorAll('[class*="cookie"], [class*="consent"]')
.forEach(el => el.remove());
// Remove live chat widgets
document.querySelectorAll('[class*="intercom"], [class*="drift"], [id*="hubspot"]')
.forEach(el => el.remove());
// Freeze animations
document.querySelectorAll('*').forEach(el => {
el.style.animation = 'none';
el.style.transition = 'none';
});
"""
resp = requests.get(f"{API_BASE}/screenshot",
params=params, timeout=30)
if resp.status_code != 200:
print(f" FAIL: {page_config['name']}@{viewport} "
f"- HTTP {resp.status_code}")
continue
filename = f"{page_config['name']}_{viewport}.png"
filepath = SCREENSHOT_DIR / filename
filepath.write_bytes(resp.content)
results.append({
"name": page_config["name"],
"viewport": viewport,
"file": str(filepath),
"size_kb": round(len(resp.content) / 1024, 1),
})
print(f" OK: {filename} ({results[-1]['size_kb']}KB)")
return results
Step 3: Compare with Baselines
from PIL import Image
import numpy as np
BASELINE_DIR = Path("visual-tests/baseline")
DIFF_DIR = Path("visual-tests/diff")
def compare_images(current_path, baseline_path):
"""Compare two images and return the difference percentage."""
current = np.array(Image.open(current_path))
baseline = np.array(Image.open(baseline_path))
# Handle size mismatches (viewport changes)
if current.shape != baseline.shape:
return 100.0, None # Completely different
# Pixel-level difference
diff = np.abs(current.astype(float) - baseline.astype(float))
diff_percentage = (diff > 10).mean() * 100 # threshold: 10/255
# Generate diff image (red highlights where changes occurred)
if diff_percentage > 0:
diff_img = np.zeros_like(current)
changed = diff.max(axis=2) > 10
diff_img[changed] = [255, 0, 0] # Red for changes
diff_img[~changed] = current[~changed] // 2 # Dim unchanged
return diff_percentage, Image.fromarray(diff_img.astype(np.uint8))
return 0.0, None
def run_comparison(test_results, threshold=0.5):
"""Compare all screenshots against baselines."""
DIFF_DIR.mkdir(parents=True, exist_ok=True)
failures = []
for result in test_results:
current = Path(result["file"])
baseline = BASELINE_DIR / current.name
if not baseline.exists():
print(f" NEW: {current.name} (no baseline)")
failures.append({
**result,
"status": "new",
"diff_pct": 100.0,
})
continue
diff_pct, diff_img = compare_images(current, baseline)
if diff_pct > threshold:
print(f" CHANGED: {current.name} ({diff_pct:.1f}% diff)")
if diff_img:
diff_path = DIFF_DIR / current.name
diff_img.save(diff_path)
failures.append({
**result,
"status": "changed",
"diff_pct": diff_pct,
})
else:
print(f" OK: {current.name} ({diff_pct:.2f}% diff)")
return failures
Step 4: GitHub Actions Integration
name: Visual Regression Tests
on:
pull_request:
branches: [main]
jobs:
visual-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: pip install requests Pillow numpy
- name: Deploy to staging
run: |
# Your staging deployment step
echo "Deploying PR to staging..."
- name: Capture screenshots
run: python visual_test_runner.py capture
- name: Compare with baselines
id: compare
run: python visual_test_runner.py compare
- name: Upload diff images
if: failure()
uses: actions/upload-artifact@v4
with:
name: visual-diffs
path: visual-tests/diff/
- name: Comment on PR
if: failure()
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const report = JSON.parse(
fs.readFileSync('visual-tests/report.json', 'utf8')
);
let body = '## Visual Regression Report\n\n';
body += `${report.failures.length} visual change(s) detected:\n\n`;
for (const f of report.failures) {
body += `- **${f.name}** @ ${f.viewport}: `;
body += `${f.diff_pct.toFixed(1)}% changed\n`;
}
body += '\nDownload diff images from the artifacts above.';
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body
});
Step 5: Update Baselines
When changes are intentional:
# After reviewing diffs and confirming they're expected:
cp visual-tests/current/*.png visual-tests/baseline/
git add visual-tests/baseline/
git commit -m "visual: update baselines for new design"
Handling Flaky Tests
Screenshots can vary between runs due to:
Anti-aliasing differences
Use a higher threshold (1-2% instead of 0.5%) or perceptual hashing:
import imagehash
from PIL import Image
def perceptual_compare(img1_path, img2_path, threshold=5):
h1 = imagehash.phash(Image.open(img1_path))
h2 = imagehash.phash(Image.open(img2_path))
return abs(h1 - h2) < threshold
Dynamic content
Use the js parameter to normalize dynamic elements:
params["js"] = """
// Replace dates with fixed text
document.querySelectorAll('time, [datetime]')
.forEach(el => el.textContent = '2026-01-01');
// Replace user avatars with placeholder
document.querySelectorAll('img[class*="avatar"]')
.forEach(el => el.src = 'data:image/svg+xml,...');
// Hide notification badges
document.querySelectorAll('[class*="badge"], [class*="notification"]')
.forEach(el => el.style.display = 'none');
"""
Viewport presets for consistency
Use the viewport parameter instead of raw width/height to ensure consistent device dimensions:
# Instead of: width=375, height=812
# Use: viewport=mobile
# This guarantees exact iPhone dimensions every time
params["viewport"] = "mobile"
Cost Analysis
For a typical project with 10 pages × 3 viewports = 30 screenshots per PR:
| Component | Cost |
|---|---|
| Screenshot API (free tier) | $0 |
| GitHub Actions minutes | ~2 min per run |
| Storage (baselines in git) | ~3MB per page set |
Total: essentially free for most projects. For higher volume (100+ screenshots per run), use an API key for increased rate limits.
Why a Screenshot API Instead of Puppeteer?
Running Puppeteer in CI means: - Installing Chromium (adds 1-2 min to CI) - Managing browser versions - Debugging headless rendering differences - Maintaining screenshot infrastructure
A screenshot API means: - One HTTP call per screenshot - No browser installation - Consistent rendering (same Chromium version every time) - Focus on tests, not infrastructure
Built with the Screenshot API — capture any webpage as an image with a single HTTP call. Supports 20 viewport presets including mobile, tablet, and social media dimensions.