How to Build a Visual Uptime Checker with a Screenshot API
Traditional uptime monitors tell you that something broke. A visual uptime checker tells you what it looked like when it broke. These are different pieces of information, and the second one is often more useful.
This tutorial builds a visual uptime checker that: - Polls your URLs on a schedule - Captures a screenshot on any failure - Stores the screenshot with the failure record - Sends an alert with the visual evidence
Why Screenshots on Failure?
When a server returns 500, you know there's an error. When a screenshot shows a "Database connection failed" message on an otherwise blank page, you know which error. When a screenshot shows your competitor's 404 page because a CDN misconfiguration is serving the wrong origin, you know something very different is wrong.
Screenshots capture what synthetic monitoring misses: rendering failures, JavaScript errors that don't affect HTTP status, content delivery failures, A/B test accidents, region-specific outages, and third-party widget failures. HTTP 200 can mask all of these.
The Basic Structure
import requests
import base64
import json
import smtplib
from datetime import datetime
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage
SCREENSHOT_API_KEY = "your-api-key"
SCREENSHOT_API_URL = "https://hermesforge.dev/api/screenshot"
def check_url(url: str, timeout: int = 30) -> dict:
"""Check a URL and return status + screenshot on failure."""
result = {
"url": url,
"timestamp": datetime.utcnow().isoformat(),
"healthy": False,
"status_code": None,
"error": None,
"screenshot_b64": None,
}
try:
# First: HTTP check
response = requests.get(url, timeout=timeout, allow_redirects=True)
result["status_code"] = response.status_code
if response.status_code >= 400:
result["error"] = f"HTTP {response.status_code}"
result["screenshot_b64"] = capture_screenshot(url)
else:
result["healthy"] = True
except requests.exceptions.Timeout:
result["error"] = "Connection timed out"
result["screenshot_b64"] = capture_screenshot(url)
except requests.exceptions.ConnectionError as e:
result["error"] = f"Connection failed: {str(e)}"
# Can't screenshot if unreachable — skip
except Exception as e:
result["error"] = str(e)
return result
def capture_screenshot(url: str) -> str | None:
"""Capture screenshot via API, return base64 string or None."""
try:
response = requests.get(
SCREENSHOT_API_URL,
params={
"url": url,
"format": "png",
"width": 1280,
"height": 800,
"wait": "networkidle",
},
headers={"X-API-Key": SCREENSHOT_API_KEY},
timeout=30,
)
if response.status_code == 200:
return base64.b64encode(response.content).decode()
except Exception:
pass
return None
Running Checks on a Schedule
import time
import schedule
from typing import Callable
MONITORED_URLS = [
"https://your-app.com",
"https://your-app.com/api/health",
"https://your-app.com/login",
]
def run_checks(urls: list[str], on_failure: Callable):
results = [check_url(url) for url in urls]
failures = [r for r in results if not r["healthy"]]
for failure in failures:
on_failure(failure)
return results
def alert_on_failure(failure: dict):
print(f"FAILURE: {failure['url']} — {failure['error']}")
if failure["screenshot_b64"]:
save_failure_screenshot(failure)
send_alert_email(failure)
def save_failure_screenshot(failure: dict):
screenshots_dir = Path("./failure_screenshots")
screenshots_dir.mkdir(exist_ok=True)
timestamp = failure["timestamp"].replace(":", "-")
domain = failure["url"].split("/")[2]
filename = f"{timestamp}_{domain}.png"
img_bytes = base64.b64decode(failure["screenshot_b64"])
(screenshots_dir / filename).write_bytes(img_bytes)
print(f"Screenshot saved: {filename}")
# Run every 5 minutes
schedule.every(5).minutes.do(
run_checks, urls=MONITORED_URLS, on_failure=alert_on_failure
)
while True:
schedule.run_pending()
time.sleep(10)
Alert Email with Screenshot Attachment
def send_alert_email(failure: dict):
msg = MIMEMultipart()
msg["Subject"] = f"[DOWN] {failure['url']}"
msg["From"] = "monitor@yourapp.com"
msg["To"] = "oncall@yourapp.com"
body = f"""
URL: {failure['url']}
Time: {failure['timestamp']} UTC
Error: {failure['error']}
HTTP Status: {failure.get('status_code', 'N/A')}
""".strip()
msg.attach(MIMEText(body, "plain"))
if failure["screenshot_b64"]:
img_data = base64.b64decode(failure["screenshot_b64"])
img = MIMEImage(img_data, name="failure.png")
img.add_header("Content-Disposition", "attachment", filename="failure.png")
msg.attach(img)
with smtplib.SMTP("localhost") as smtp:
smtp.send_message(msg)
Handling False Positives
Single-check failures are noisy. Add a confirmation step:
from collections import defaultdict
failure_counts = defaultdict(int)
FAILURE_THRESHOLD = 2 # Alert after 2 consecutive failures
def alert_on_failure_with_threshold(failure: dict):
url = failure["url"]
failure_counts[url] += 1
if failure_counts[url] >= FAILURE_THRESHOLD:
# Real failure — alert
send_alert_email(failure)
save_failure_screenshot(failure)
print(f"ALERT sent for {url} (failed {failure_counts[url]} times)")
else:
print(f"Possible failure for {url} ({failure_counts[url]}/{FAILURE_THRESHOLD})")
def reset_failure_count(url: str):
if failure_counts[url] > 0:
print(f"RECOVERED: {url}")
failure_counts[url] = 0
def run_checks_with_threshold(urls: list[str]):
results = [check_url(url) for url in urls]
for result in results:
if result["healthy"]:
reset_failure_count(result["url"])
else:
alert_on_failure_with_threshold(result)
return results
What to Monitor Beyond HTTP Status
The HTTP check catches server-level failures. Screenshots catch presentation-layer failures. For thorough coverage, add content assertions:
def check_url_with_content(url: str, must_contain: str = None) -> dict:
result = check_url(url)
if result["healthy"] and must_contain:
# Verify expected content is present
try:
response = requests.get(url, timeout=15)
if must_contain not in response.text:
result["healthy"] = False
result["error"] = f"Content assertion failed: '{must_contain}' not found"
result["screenshot_b64"] = capture_screenshot(url)
except Exception as e:
result["healthy"] = False
result["error"] = str(e)
return result
# Usage
checks = [
{"url": "https://your-app.com", "must_contain": "Sign In"},
{"url": "https://your-app.com/api/health", "must_contain": '"status":"ok"'},
]
Rate Limit Considerations
If you're monitoring many URLs at high frequency, you'll hit rate limits on the screenshot API. Structure your monitoring to capture screenshots conservatively:
- HTTP check first — only take a screenshot if the HTTP check fails
- Deduplicate — don't screenshot the same URL more than once per N minutes, even if it keeps failing
- Use the right tier — 200 calls/day covers hourly checks of 8 URLs; 1000 calls/day covers 5-minute checks of 3 URLs
The screenshot API is for failure evidence, not routine polling. HTTP pings are cheap; screenshots have cost. Use each appropriately.
hermesforge.dev — screenshot API for developers. Visual failure evidence included.