How I Monitor 50 Competitors With a Screenshot API
I used to check competitor sites manually. Every Monday morning, I'd open a browser, tab through eight or ten URLs, scan for anything that looked different, and close the tabs feeling like I'd learned almost nothing.
After a few months of this, I built an automated system instead. It now monitors 50 competitor pages, runs twice a day, and emails me a summary with annotated screenshots when anything changes. Here's exactly how it works.
Why Screenshots Over Scraping
My first instinct was to scrape text — extract pricing tables, feature lists, button text. The problem: modern sites change their HTML structure constantly. Selectors break. You spend more time maintaining the scraper than reading the output.
Screenshots sidestep this entirely. I don't care about the DOM structure. I care about what the page looks like — whether the pricing changed, whether they launched a new hero, whether that "Coming Soon" badge disappeared. Screenshots capture exactly what a human would see.
The downside is you can't grep a screenshot. But combined with a pixel-diff to detect changes, and a manual review step for anything flagged, it's far more reliable than text extraction for competitive monitoring.
The Architecture
Cron (2x/day at 08:00 and 20:00 UTC)
→ Capture screenshots of all 50 pages
→ Compare against last capture (pixel diff)
→ For pages with > 2% change: flag for review
→ Send summary email with flagged pages + diff images
→ Archive all captures with timestamp
Step 1: The Competitor List
I maintain a YAML file with the URLs I monitor, organized by category:
# competitors.yml
categories:
pricing:
- name: "Competitor A"
url: "https://competitor-a.com/pricing"
threshold: 1.5 # % pixel change to flag
- name: "Competitor B"
url: "https://competitor-b.com/pricing"
threshold: 2.0
homepage:
- name: "Competitor A"
url: "https://competitor-a.com"
threshold: 5.0 # homepages change more, higher threshold
- name: "Competitor C"
url: "https://competitor-c.com"
threshold: 5.0
features:
- name: "Competitor A"
url: "https://competitor-a.com/features"
threshold: 2.0
The per-URL threshold matters: pricing pages should trigger on small changes (maybe they bumped a price $1). Homepages change all the time for A/B tests and shouldn't flood your inbox on every carousel update.
Step 2: The Capture Script
import requests
import yaml
import os
import time
from pathlib import Path
from datetime import datetime, timezone
API_KEY = os.environ['SCREENSHOT_API_KEY']
BASE_URL = 'https://hermesforge.dev/api/screenshot'
ARCHIVE_DIR = Path('competitor-archive')
def capture(url, width=1440, height=900):
"""Capture screenshot at desktop resolution."""
resp = requests.get(
BASE_URL,
params={
'url': url,
'width': width,
'height': height,
'format': 'png',
'full_page': 'false', # viewport only — faster, avoids infinite scroll pages
'delay': 1000, # wait for dynamic content
'block_ads': 'true', # cleaner screenshots
},
headers={'X-API-Key': API_KEY},
timeout=30,
)
resp.raise_for_status()
return resp.content
def run_captures(config_path='competitors.yml'):
with open(config_path) as f:
config = yaml.safe_load(f)
timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
results = {}
for category, pages in config['categories'].items():
for page in pages:
name = page['name']
url = page['url']
key = f"{name}_{category}"
# Save to timestamped archive
archive_path = ARCHIVE_DIR / category / name.replace(' ', '_')
archive_path.mkdir(parents=True, exist_ok=True)
capture_file = archive_path / f"{timestamp}.png"
# Keep a 'latest' copy for comparison
latest_file = archive_path / 'latest.png'
try:
image = capture(url)
capture_file.write_bytes(image)
results[key] = {
'name': name,
'category': category,
'url': url,
'threshold': page.get('threshold', 2.0),
'capture_file': capture_file,
'latest_file': latest_file,
'status': 'ok',
}
print(f" OK {name} ({category})")
except Exception as e:
results[key] = {'name': name, 'url': url, 'status': 'error', 'error': str(e)}
print(f" FAIL {name}: {e}")
time.sleep(0.3) # polite rate limiting
return results, timestamp
Step 3: Detect Changes
from PIL import Image, ImageChops, ImageDraw
import numpy as np
def compute_diff(old_path, new_path):
"""Returns change percentage and a highlighted diff image."""
old = Image.open(old_path).convert('RGB')
new = Image.open(new_path).convert('RGB')
# Match dimensions (pages can grow/shrink)
if old.size != new.size:
new = new.resize(old.size, Image.LANCZOS)
diff = ImageChops.difference(old, new)
arr = np.array(diff)
# Changed pixels: any channel differs by more than threshold
changed_mask = np.any(arr > 15, axis=2)
change_pct = changed_mask.sum() / (arr.shape[0] * arr.shape[1]) * 100
# Create annotated diff: red overlay on changed regions
diff_image = new.copy()
overlay = Image.new('RGBA', new.size, (0, 0, 0, 0))
draw = ImageDraw.Draw(overlay)
# Find bounding boxes of changed regions (simplified: one box)
rows = np.where(changed_mask.any(axis=1))[0]
cols = np.where(changed_mask.any(axis=0))[0]
if len(rows) > 0 and len(cols) > 0:
draw.rectangle(
[cols[0], rows[0], cols[-1], rows[-1]],
outline=(255, 0, 0, 200),
width=3,
)
diff_image.paste(overlay, mask=overlay.split()[3])
return round(change_pct, 2), diff_image
def check_changes(results, timestamp):
changes = []
for key, result in results.items():
if result['status'] != 'ok':
continue
latest_file = result['latest_file']
capture_file = result['capture_file']
if not latest_file.exists():
# First capture — just save as latest
capture_file.rename(latest_file)
import shutil
shutil.copy(latest_file, capture_file) # restore archive copy
continue
change_pct, diff_image = compute_diff(latest_file, capture_file)
if change_pct > result['threshold']:
diff_path = capture_file.parent / f"{timestamp}_diff.png"
diff_image.save(diff_path)
changes.append({
**result,
'change_pct': change_pct,
'diff_path': diff_path,
'new_capture': capture_file,
})
# Update latest after flagging
import shutil
shutil.copy(capture_file, latest_file)
else:
# No significant change — update latest silently
import shutil
shutil.copy(capture_file, latest_file)
return changes
Step 4: Email Summary
I send myself an HTML email with inline diff images for anything flagged:
import smtplib
import base64
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
def send_change_report(changes, timestamp):
if not changes:
print("No changes detected — no email sent.")
return
msg = MIMEMultipart('related')
msg['Subject'] = f"Competitor Changes: {len(changes)} page(s) updated [{timestamp}]"
msg['From'] = os.environ['REPORT_EMAIL_FROM']
msg['To'] = os.environ['REPORT_EMAIL_TO']
# Build HTML body
html_parts = [
"<html><body>",
f"<h2>{len(changes)} competitor page(s) changed</h2>",
f"<p>Detected at {timestamp} UTC</p>",
]
cid_map = {}
for i, change in enumerate(changes):
cid = f"diff_{i}"
cid_map[cid] = change['diff_path']
html_parts.append(f"""
<hr/>
<h3>{change['name']} — {change['category']}</h3>
<p>
<strong>URL:</strong> <a href="{change['url']}">{change['url']}</a><br/>
<strong>Change:</strong> {change['change_pct']:.1f}% of pixels
(threshold: {change['threshold']}%)
</p>
<img src="cid:{cid}" style="max-width:100%; border:1px solid #ccc;"/>
""")
html_parts.append("</body></html>")
html_body = "\n".join(html_parts)
msg.attach(MIMEText(html_body, 'html'))
# Attach diff images as inline
for cid, path in cid_map.items():
with open(path, 'rb') as f:
img = MIMEImage(f.read())
img.add_header('Content-ID', f'<{cid}>')
img.add_header('Content-Disposition', 'inline')
msg.attach(img)
# Send via SMTP
with smtplib.SMTP_SSL(os.environ['SMTP_HOST'], 465) as smtp:
smtp.login(os.environ['SMTP_USER'], os.environ['SMTP_PASS'])
smtp.send_message(msg)
print(f"Sent change report: {len(changes)} changes")
Step 5: Cron Schedule
# crontab -e
# Run at 08:00 and 20:00 UTC daily
0 8,20 * * * cd /home/user/competitor-monitor && \
SCREENSHOT_API_KEY=xxx \
REPORT_EMAIL_FROM=you@yourdomain.com \
REPORT_EMAIL_TO=you@yourdomain.com \
SMTP_HOST=smtp.yourprovider.com \
SMTP_USER=you@yourdomain.com \
SMTP_PASS=xxx \
python monitor.py >> logs/monitor.log 2>&1
What I Actually Learned
After two months of running this, the genuinely useful signals:
Pricing changes: Detected two competitors raising prices within 24 hours. One of them added a new tier between their free and pro plans. Would have missed both manually.
Hero copy experiments: One competitor ran at least four different hero headlines over six weeks. Their final choice ("Turn any URL into a screenshot in seconds") is better than their original ("Developer-first screenshot API"). Good to know.
Feature launches: Caught a "Coming Soon" label disappearing from a competitor's PDF export feature. Checked it immediately — they had just shipped it. Gave me a two-day head start on a response.
False positives: Animated elements (carousels, live chat widgets, banners) trigger false positives on homepages. Solution: use full_page: false and a higher threshold for homepages, or add a clip parameter to capture only the stable region (above the fold, excluding the footer).
Get Your API Key
Free API key at hermesforge.dev/screenshot. The 50-page monitor running twice daily uses about 100 API calls/day.