Generate PDF Reports with Website Screenshots

2026-04-27 | Tags: [screenshot-api, pdf, automation, python, reporting]

Generate PDF Reports with Website Screenshots

Client reports, SEO audits, competitor analyses — they all need screenshots. Manually capturing and pasting screenshots into documents is tedious and error-prone. Let's automate the entire pipeline: capture screenshots via API, embed them in a professional PDF, and deliver it on schedule.

The Stack

Python Implementation with ReportLab

import requests
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
from reportlab.lib.styles import getSampleStyleSheet
from io import BytesIO
from datetime import datetime

API_BASE = "https://hermesforge.dev/api/screenshot"

def capture_screenshot(url, viewport="desktop", full_page=False):
    """Capture a website screenshot via API."""
    params = {
        "url": url,
        "viewport": viewport,
        "full_page": str(full_page).lower(),
        "format": "png"  # PNG for report quality
    }
    response = requests.get(API_BASE, params=params)
    response.raise_for_status()
    return BytesIO(response.content)

def generate_report(sites, output_path="report.pdf"):
    """Generate a PDF report with screenshots of multiple sites."""
    doc = SimpleDocTemplate(output_path, pagesize=A4)
    styles = getSampleStyleSheet()
    story = []

    # Title
    title = f"Website Screenshot Report — {datetime.now().strftime('%B %d, %Y')}"
    story.append(Paragraph(title, styles["Title"]))
    story.append(Spacer(1, 0.3 * inch))

    for site in sites:
        url = site["url"]
        label = site.get("label", url)

        # Section header
        story.append(Paragraph(label, styles["Heading2"]))
        story.append(Spacer(1, 0.1 * inch))

        # Desktop screenshot
        try:
            img_data = capture_screenshot(url, viewport="desktop")
            img = Image(img_data, width=6.5 * inch, height=3.6 * inch)
            story.append(img)
        except Exception as e:
            story.append(Paragraph(f"Screenshot failed: {e}", styles["Normal"]))

        story.append(Spacer(1, 0.1 * inch))

        # Mobile screenshot (side note)
        try:
            mobile_data = capture_screenshot(url, viewport="mobile")
            img = Image(mobile_data, width=2 * inch, height=4.3 * inch)
            story.append(Paragraph("Mobile view:", styles["Italic"]))
            story.append(img)
        except Exception as e:
            story.append(Paragraph(f"Mobile screenshot failed: {e}", styles["Normal"]))

        story.append(Spacer(1, 0.5 * inch))

    doc.build(story)
    print(f"Report saved to {output_path}")

# Example usage
sites = [
    {"url": "https://example.com", "label": "Example.com — Homepage"},
    {"url": "https://httpbin.org", "label": "HTTPBin — API Testing"},
]

generate_report(sites, "weekly-report.pdf")

Adding Comparison Screenshots

For competitor monitoring or visual regression, capture the same pages over time and show them side by side:

from reportlab.platypus import Table, TableStyle
from reportlab.lib import colors

def comparison_row(url, label, viewports=("desktop",)):
    """Capture screenshots at multiple viewports for comparison."""
    images = []
    for vp in viewports:
        try:
            img_data = capture_screenshot(url, viewport=vp)
            images.append(Image(img_data, width=3 * inch, height=1.7 * inch))
        except:
            images.append(Paragraph("Failed", getSampleStyleSheet()["Normal"]))
    return images

def generate_comparison_report(competitors, output="comparison.pdf"):
    doc = SimpleDocTemplate(output, pagesize=A4)
    styles = getSampleStyleSheet()
    story = []

    story.append(Paragraph("Competitor Comparison Report", styles["Title"]))
    story.append(Spacer(1, 0.3 * inch))

    # Header row
    header = ["Site", "Desktop", "Mobile"]
    data = [header]

    for comp in competitors:
        desktop = capture_screenshot(comp["url"], "desktop")
        mobile = capture_screenshot(comp["url"], "mobile")
        row = [
            Paragraph(comp["label"], styles["Normal"]),
            Image(desktop, width=2.5 * inch, height=1.4 * inch),
            Image(mobile, width=1.2 * inch, height=2.6 * inch),
        ]
        data.append(row)

    table = Table(data, colWidths=[1.5 * inch, 2.8 * inch, 1.5 * inch])
    table.setStyle(TableStyle([
        ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
        ("BACKGROUND", (0, 0), (-1, 0), colors.lightblue),
        ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
        ("PADDING", (0, 0), (-1, -1), 6),
    ]))
    story.append(table)
    doc.build(story)

Social Media Preview Audit

Check how your pages look when shared on LinkedIn, Twitter, and Facebook:

def social_preview_audit(url, output="social-audit.pdf"):
    """Generate a report showing how a URL appears on each social platform."""
    doc = SimpleDocTemplate(output, pagesize=A4)
    styles = getSampleStyleSheet()
    story = []

    story.append(Paragraph("Social Media Preview Audit", styles["Title"]))
    story.append(Paragraph(f"URL: {url}", styles["Normal"]))
    story.append(Spacer(1, 0.3 * inch))

    social_viewports = {
        "Open Graph (Facebook)": "og",
        "Twitter Card": "twitter",
        "LinkedIn Post": "linkedin",
    }

    for platform, viewport in social_viewports.items():
        story.append(Paragraph(platform, styles["Heading2"]))
        try:
            img_data = capture_screenshot(url, viewport=viewport)
            img = Image(img_data, width=5 * inch, height=2.6 * inch)
            story.append(img)
        except Exception as e:
            story.append(Paragraph(f"Failed: {e}", styles["Normal"]))
        story.append(Spacer(1, 0.3 * inch))

    doc.build(story)

Node.js Alternative with PDFKit

const https = require('https');
const PDFDocument = require('pdfkit');
const fs = require('fs');

async function captureScreenshot(url, viewport = 'desktop') {
  const apiUrl = new URL('https://hermesforge.dev/api/screenshot');
  apiUrl.searchParams.set('url', url);
  apiUrl.searchParams.set('viewport', viewport);
  apiUrl.searchParams.set('format', 'png');

  return new Promise((resolve, reject) => {
    https.get(apiUrl, (res) => {
      const chunks = [];
      res.on('data', (chunk) => chunks.push(chunk));
      res.on('end', () => resolve(Buffer.concat(chunks)));
      res.on('error', reject);
    });
  });
}

async function generateReport(sites, outputPath = 'report.pdf') {
  const doc = new PDFDocument({ size: 'A4', margin: 50 });
  doc.pipe(fs.createWriteStream(outputPath));

  doc.fontSize(24).text('Website Screenshot Report', { align: 'center' });
  doc.fontSize(12).text(new Date().toLocaleDateString(), { align: 'center' });
  doc.moveDown(2);

  for (const site of sites) {
    const screenshot = await captureScreenshot(site.url);

    doc.fontSize(16).text(site.label || site.url);
    doc.moveDown(0.5);
    doc.image(screenshot, { width: 500, height: 280 });
    doc.moveDown(1);

    // Page break between sites
    if (sites.indexOf(site) < sites.length - 1) {
      doc.addPage();
    }
  }

  doc.end();
  console.log(`Report saved to ${outputPath}`);
}

// Usage
generateReport([
  { url: 'https://example.com', label: 'Example.com' },
  { url: 'https://httpbin.org', label: 'HTTPBin' },
]);

Automating with Cron

Run the report weekly and email it:

# Generate weekly competitor report every Monday at 9am
0 9 * * 1 cd /opt/reports && python3 generate_report.py && \
  mail -s "Weekly Screenshot Report" team@company.com -A weekly-report.pdf < /dev/null

Or with GitHub Actions:

name: Weekly Screenshot Report
on:
  schedule:
    - cron: '0 9 * * 1'

jobs:
  report:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - run: pip install requests reportlab
      - run: python generate_report.py
      - uses: actions/upload-artifact@v4
        with:
          name: weekly-report
          path: weekly-report.pdf

Tips for Production Use

  1. Use PNG format for reports — WebP is smaller but PDF renderers handle PNG more reliably
  2. Set appropriate viewportsdesktop (1280x720) for general reports, og/twitter/linkedin for social audits
  3. Add error handling — sites go down; your report should note failures, not crash
  4. Cache screenshots — if generating multiple report variants, capture once and reuse
  5. Batch endpoint — for 5+ URLs, use /api/screenshot/batch to capture in parallel (requires API key)

Why an API Instead of Local Puppeteer?

Running Puppeteer locally for PDF reports means: - Installing Chrome/Chromium on your report server - Managing browser memory leaks in long-running processes - Handling browser crashes during batch captures - Keeping the browser updated for rendering accuracy

An HTTP API call replaces all of that with one line. The screenshot is someone else's problem; your code just builds the PDF.