Automating Monitoring Dashboard Screenshots: Grafana, Metabase, and Datadog

2026-05-23 | Tags: [use-case, monitoring, dashboards, grafana, screenshot-api, automation]

Monitoring dashboards contain the most important operational information in a company — but they only show the current state. When an incident occurs, the dashboard state at the moment of impact is often gone by the time anyone starts the postmortem. Screenshot-based archiving fixes this: regular captures create a time-series of operational state that persists beyond the live dashboard.

This guide covers automatic capture of Grafana, Metabase, and Datadog dashboards, with Slack delivery, email digests, and S3 archiving pipelines.

The Core Challenge: Async Rendering

Dashboard charts render asynchronously. Grafana fetches panel data via API calls that complete after the page loads. A naive screenshot captures a loading state — spinners instead of charts. The wait parameter solves this by delaying capture after page load.

import requests
import os

HERMES_API_KEY = os.environ["HERMES_API_KEY"]


def capture_dashboard(url: str, wait_ms: int = 3000, width: int = 1920) -> bytes:
    """
    Capture a monitoring dashboard, waiting for async chart rendering.

    wait_ms: milliseconds to wait after page load before capture.
    Grafana: 3000ms. Metabase: 2000ms. Datadog: 4000ms.
    """
    resp = requests.get(
        "https://hermesforge.dev/api/screenshot",
        headers={"X-API-Key": HERMES_API_KEY},
        params={
            "url":       url,
            "format":    "webp",
            "width":     width,
            "full_page": True,
            "wait":      wait_ms,
        },
        timeout=90  # wait_ms + rendering headroom
    )
    resp.raise_for_status()
    return resp.content

Grafana Dashboard Capture

Grafana's URL structure supports time range parameters. Use them to capture specific windows:

from datetime import datetime, timedelta, timezone
from urllib.parse import urlencode

GRAFANA_BASE = os.environ["GRAFANA_BASE_URL"]  # e.g. https://grafana.company.com


def grafana_dashboard_url(
    dashboard_uid: str,
    panel_id: int | None = None,
    hours_back: int = 6,
    width: int = 1920,
    height: int = 1080,
) -> str:
    """
    Build a Grafana dashboard URL with time range and kiosk mode.
    kiosk=true removes nav UI from the screenshot.
    """
    now = datetime.now(timezone.utc)
    from_ts = int((now - timedelta(hours=hours_back)).timestamp() * 1000)
    to_ts   = int(now.timestamp() * 1000)

    params = {
        "from":   from_ts,
        "to":     to_ts,
        "kiosk":  "true",  # Removes nav header/footer
        "width":  width,
        "height": height,
    }
    if panel_id:
        params["viewPanel"] = panel_id

    return f"{GRAFANA_BASE}/d/{dashboard_uid}?{urlencode(params)}"


def capture_grafana(dashboard_uid: str, hours_back: int = 6) -> bytes:
    url = grafana_dashboard_url(dashboard_uid, hours_back=hours_back)
    return capture_dashboard(url, wait_ms=4000, width=1920)


def capture_grafana_panel(dashboard_uid: str, panel_id: int) -> bytes:
    url = grafana_dashboard_url(dashboard_uid, panel_id=panel_id)
    return capture_dashboard(url, wait_ms=3000, width=1280)

Grafana with Authentication

If your Grafana instance requires login, use service account tokens:

def capture_grafana_authenticated(
    dashboard_uid: str,
    service_account_token: str,
    hours_back: int = 6
) -> bytes:
    """
    Capture a Grafana dashboard that requires authentication.
    Uses Grafana's /render/ API endpoint which accepts token auth.
    """
    now = datetime.now(timezone.utc)
    from_ts = int((now - timedelta(hours=hours_back)).timestamp() * 1000)
    to_ts   = int(now.timestamp() * 1000)

    # Grafana's render API includes auth via query param
    render_url = (
        f"{GRAFANA_BASE}/render/d/{dashboard_uid}"
        f"?from={from_ts}&to={to_ts}"
        f"&width=1920&height=1080"
        f"&kiosk=true"
        f"&auth_token={service_account_token}"
    )
    return capture_dashboard(render_url, wait_ms=4000, width=1920)

Metabase Dashboard Capture

Metabase supports public sharing links that don't require authentication:

METABASE_BASE = os.environ["METABASE_BASE_URL"]


def capture_metabase_public(public_uuid: str) -> bytes:
    """Capture a Metabase public dashboard (no auth required)."""
    url = f"{METABASE_BASE}/public/dashboard/{public_uuid}"
    return capture_dashboard(url, wait_ms=3000, width=1440)


def capture_metabase_question(public_uuid: str) -> bytes:
    """Capture a single Metabase question/chart."""
    url = f"{METABASE_BASE}/public/question/{public_uuid}"
    return capture_dashboard(url, wait_ms=2000, width=1280)

Scheduled Capture Pipeline

import schedule
import time
import boto3
from datetime import datetime, timezone
from pathlib import Path

s3 = boto3.client("s3")
ARCHIVE_BUCKET = os.environ["ARCHIVE_BUCKET"]

DASHBOARDS = [
    {
        "name": "production-overview",
        "uid":  "abc123",
        "type": "grafana",
        "schedule": "*/15 * * * *",  # Every 15 minutes
    },
    {
        "name": "revenue-metrics",
        "uuid": "def456",
        "type": "metabase",
        "schedule": "0 * * * *",    # Hourly
    },
]


def archive_to_s3(name: str, image_data: bytes) -> str:
    """Store dashboard screenshot in S3 with timestamp key."""
    ts = datetime.now(timezone.utc).strftime("%Y/%m/%d/%H-%M-%S")
    key = f"dashboards/{name}/{ts}.webp"
    s3.put_object(
        Bucket=ARCHIVE_BUCKET,
        Key=key,
        Body=image_data,
        ContentType="image/webp",
        Metadata={"dashboard": name, "captured_at": datetime.now(timezone.utc).isoformat()},
    )
    return f"s3://{ARCHIVE_BUCKET}/{key}"


def capture_and_archive(dashboard: dict) -> str | None:
    try:
        if dashboard["type"] == "grafana":
            data = capture_grafana(dashboard["uid"])
        elif dashboard["type"] == "metabase":
            data = capture_metabase_public(dashboard["uuid"])
        else:
            return None

        s3_path = archive_to_s3(dashboard["name"], data)
        print(f"Archived {dashboard['name']} → {s3_path}")
        return s3_path

    except Exception as e:
        print(f"Failed to capture {dashboard['name']}: {e}")
        return None


# Schedule captures
for dashboard in DASHBOARDS:
    schedule.every(15).minutes.do(capture_and_archive, dashboard=dashboard)

if __name__ == "__main__":
    while True:
        schedule.run_pending()
        time.sleep(30)

Slack Delivery

import json

SLACK_WEBHOOK_URL = os.environ["SLACK_WEBHOOK_URL"]
SLACK_BOT_TOKEN   = os.environ["SLACK_BOT_TOKEN"]
SLACK_CHANNEL     = os.environ.get("SLACK_CHANNEL", "#ops-snapshots")


def send_dashboard_to_slack(
    image_data: bytes,
    dashboard_name: str,
    channel: str = SLACK_CHANNEL,
    message: str | None = None,
) -> bool:
    """Upload dashboard screenshot to Slack channel."""
    import io

    # Step 1: Get upload URL
    upload_resp = requests.post(
        "https://slack.com/api/files.getUploadURLExternal",
        headers={"Authorization": f"Bearer {SLACK_BOT_TOKEN}"},
        json={
            "filename": f"{dashboard_name}.webp",
            "length":   len(image_data),
        }
    )
    upload_data = upload_resp.json()
    if not upload_data.get("ok"):
        print(f"Slack upload URL failed: {upload_data}")
        return False

    upload_url = upload_data["upload_url"]
    file_id    = upload_data["file_id"]

    # Step 2: Upload content
    requests.post(upload_url, data=image_data,
                  headers={"Content-Type": "image/webp"})

    # Step 3: Complete upload and share to channel
    ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
    complete_resp = requests.post(
        "https://slack.com/api/files.completeUploadExternal",
        headers={"Authorization": f"Bearer {SLACK_BOT_TOKEN}"},
        json={
            "files":          [{"id": file_id}],
            "channel_id":     channel,
            "initial_comment": message or f"Dashboard snapshot: *{dashboard_name}* — {ts}",
        }
    )
    return complete_resp.json().get("ok", False)


def morning_digest(dashboards: list[dict], channel: str = SLACK_CHANNEL):
    """Send a morning digest of all dashboard snapshots to Slack."""
    for dashboard in dashboards:
        try:
            data = capture_and_archive(dashboard)
            if data:
                image_data = capture_dashboard(
                    grafana_dashboard_url(dashboard.get("uid", ""), hours_back=12)
                    if dashboard["type"] == "grafana"
                    else f"{METABASE_BASE}/public/dashboard/{dashboard.get('uuid', '')}"
                )
                send_dashboard_to_slack(
                    image_data,
                    dashboard["name"],
                    channel=channel,
                    message=f":bar_chart: *{dashboard['name']}* — last 12h",
                )
        except Exception as e:
            print(f"Digest failed for {dashboard['name']}: {e}")

Email Digest

import smtplib
import base64
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage


def send_dashboard_email(
    image_data: bytes,
    dashboard_name: str,
    recipients: list[str],
    smtp_host: str,
    smtp_port: int = 587,
):
    """Email a dashboard screenshot as inline image."""
    msg = MIMEMultipart("related")
    msg["Subject"] = f"Dashboard Snapshot: {dashboard_name} — {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}"
    msg["From"]    = os.environ["SMTP_FROM"]
    msg["To"]      = ", ".join(recipients)

    html_body = f"""
    <html><body>
      <h2>{dashboard_name}</h2>
      <p>Captured at {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}</p>
      <img src="cid:dashboard_image" style="max-width:100%;border:1px solid #eee;">
    </body></html>
    """
    msg.attach(MIMEText(html_body, "html"))

    img = MIMEImage(image_data, "webp")
    img.add_header("Content-ID", "<dashboard_image>")
    img.add_header("Content-Disposition", "inline", filename=f"{dashboard_name}.webp")
    msg.attach(img)

    with smtplib.SMTP(smtp_host, smtp_port) as smtp:
        smtp.starttls()
        smtp.login(os.environ["SMTP_USER"], os.environ["SMTP_PASS"])
        smtp.sendmail(msg["From"], recipients, msg.as_string())

Incident Archiving

When an incident is detected (via PagerDuty, Alertmanager, or manual trigger), capture the current dashboard state immediately:

@app.route("/webhooks/pagerduty", methods=["POST"])
def pagerduty_webhook():
    """Capture dashboard snapshot when PagerDuty incident opens."""
    event = request.json
    event_type = event.get("event", {}).get("event_type")

    if event_type == "incident.trigger":
        incident_id = event["event"]["data"]["id"]
        service     = event["event"]["data"]["service"]["summary"]

        # Find relevant dashboard for this service
        dashboard_uid = SERVICE_DASHBOARD_MAP.get(service)
        if dashboard_uid:
            image_data = capture_grafana(dashboard_uid, hours_back=1)

            # Archive to S3 with incident metadata
            key = f"incidents/{incident_id}/{datetime.utcnow().strftime('%H-%M-%S')}.webp"
            s3.put_object(
                Bucket=ARCHIVE_BUCKET,
                Key=key,
                Body=image_data,
                ContentType="image/webp",
                Metadata={"incident_id": incident_id, "service": service},
            )

            # Send to ops channel
            send_dashboard_to_slack(
                image_data,
                dashboard_name=f"{service} (incident {incident_id})",
                channel="#incidents",
                message=f":rotating_light: Incident {incident_id} opened — *{service}* dashboard at trigger time",
            )

    return "", 200


# Map service names to Grafana dashboard UIDs
SERVICE_DASHBOARD_MAP = {
    "API Gateway":      "api-gateway-overview",
    "Screenshot API":   "screenshot-service-metrics",
    "Database":         "postgres-performance",
}

Wait Times by Platform

Platform Recommended wait Notes
Grafana 4000ms Panel queries complete in 2–3s on cold
Metabase 3000ms Question rendering varies by complexity
Datadog 5000ms Dashboards with many widgets
Kibana 4000ms Visualizations require Elasticsearch round-trips
Custom React/Vue dashboards 2000–3000ms Depends on data fetch latency
Recharts/Chart.js 1500ms Pure client-side rendering

For dashboards with many panels, increase wait until captures are consistently complete. The Screenshot API's timeout is 60s by default; set your Python requests timeout to wait_ms/1000 + 30 for headroom.


Free API key at hermesforge.dev. 50 captures/day, no credit card required.