Generating PDF Reports from Web Pages with a Screenshot API

2026-04-27 | Tags: [screenshot-api, pdf, python, reporting, automation, weasyprint, pillow, story]

PDF generation from web pages has a long, frustrating history.

The standard approaches all have the same problem: they try to re-render the web page in a PDF layout engine, and modern web pages don't cooperate. CSS Grid doesn't always translate. Flexbox behaves differently. JavaScript-rendered content doesn't show up at all. The result is PDFs that look nothing like the page you were trying to capture.

Screenshots sidestep the whole problem. A screenshot is what the page actually looks like. Convert the screenshot to PDF and you have a pixel-perfect record.

Here are three approaches for different use cases.

Approach 1: Single-Page PDF (Simplest)

For dashboards, report pages, and anything that fits in one or two screens.

import requests
import io
import os
from pathlib import Path
from PIL import Image
import img2pdf

API_KEY = os.environ['SCREENSHOT_API_KEY']
SCREENSHOT_URL = 'https://hermesforge.dev/api/screenshot'

def url_to_pdf(url, output_path, width=1280, height=900, delay=1000, full_page=True):
    """Capture a URL and save as PDF."""
    # Get screenshot
    resp = requests.get(
        SCREENSHOT_URL,
        params={
            'url': url,
            'width': width,
            'height': height,
            'format': 'png',
            'full_page': str(full_page).lower(),
            'delay': delay,
        },
        headers={'X-API-Key': API_KEY},
        timeout=60,
    )
    resp.raise_for_status()

    # Convert PNG bytes to PDF using img2pdf
    # img2pdf preserves exact pixel dimensions and produces lossless PDFs
    pdf_bytes = img2pdf.convert(resp.content)
    Path(output_path).write_bytes(pdf_bytes)
    return output_path

# Usage
url_to_pdf(
    'https://yoursite.com/reports/monthly',
    'monthly-report.pdf',
    delay=2000  # wait for charts to render
)

img2pdf is the right tool here because it creates PDFs by embedding the image directly, without re-encoding it. The result is lossless and the file size is predictable (roughly the same as the PNG).

Page size: img2pdf sets the PDF page dimensions to match the image dimensions. For a 1280×900 screenshot, you'll get a 1280×900pt PDF page. If you need A4 or Letter, specify the target dimensions:

import img2pdf

# A4 at 96 DPI
a4_width = img2pdf.in_to_pt(8.27)
a4_height = img2pdf.in_to_pt(11.69)

pdf_bytes = img2pdf.convert(png_bytes, layout_fun=img2pdf.get_layout_fun(
    (a4_width, a4_height)
))

Approach 2: Multi-Page PDF from Multiple URLs

For reports that span multiple pages, or collections of screenshots that should be bound into a single document.

from PIL import Image
import img2pdf
import io
import time

def urls_to_pdf(urls, output_path, delay=1000, **capture_kwargs):
    """Capture multiple URLs and combine into a single PDF."""
    png_images = []

    for i, url in enumerate(urls, 1):
        print(f"  Capturing [{i}/{len(urls)}]: {url}")
        resp = requests.get(
            SCREENSHOT_URL,
            params={
                'url': url,
                'width': capture_kwargs.get('width', 1280),
                'height': capture_kwargs.get('height', 900),
                'format': 'png',
                'full_page': str(capture_kwargs.get('full_page', True)).lower(),
                'delay': delay,
            },
            headers={'X-API-Key': API_KEY},
            timeout=60,
        )
        resp.raise_for_status()
        png_images.append(resp.content)
        time.sleep(0.5)  # be gentle to the API

    # Combine all images into one PDF
    pdf_bytes = img2pdf.convert(png_images)
    Path(output_path).write_bytes(pdf_bytes)
    print(f"  Written: {output_path} ({len(png_images)} pages)")
    return output_path

# Monthly report: overview + three regional breakdowns + appendix
urls_to_pdf([
    'https://yoursite.com/reports/overview',
    'https://yoursite.com/reports/region/north',
    'https://yoursite.com/reports/region/south',
    'https://yoursite.com/reports/region/west',
    'https://yoursite.com/reports/appendix',
], 'monthly-regional-report.pdf', delay=2000)

Approach 3: Long-Page PDF with Pagination

For pages that are very long (documentation, full audit reports, changelog dumps), you may want to split the screenshot into A4-sized segments rather than having one enormous page in the PDF.

import math

def long_url_to_paginated_pdf(url, output_path,
                               page_width=1280, page_height=1123,  # ~A4 at 96dpi
                               delay=2000, overlap=50):
    """
    Capture a full-page screenshot and slice it into page-height segments.
    overlap: pixels of overlap between pages to prevent content from being cut mid-line.
    """
    # Capture full page
    resp = requests.get(
        SCREENSHOT_URL,
        params={
            'url': url,
            'width': page_width,
            'format': 'png',
            'full_page': 'true',
            'delay': delay,
        },
        headers={'X-API-Key': API_KEY},
        timeout=120,
    )
    resp.raise_for_status()

    full_image = Image.open(io.BytesIO(resp.content))
    full_width, full_height = full_image.size

    print(f"  Full page: {full_width}x{full_height}px")

    # Slice into pages
    pages = []
    y = 0
    page_num = 0

    while y < full_height:
        page_num += 1
        bottom = min(y + page_height, full_height)
        page_img = full_image.crop((0, y, full_width, bottom))

        # If this isn't the last page, add a small overlap slice at the bottom
        # to avoid cutting words in half
        if bottom < full_height:
            # Find a good cut point: look for a mostly-white horizontal strip
            # Simple approach: just use the fixed height with overlap on next page
            pass

        # Convert page to bytes
        buf = io.BytesIO()
        page_img.save(buf, format='PNG')
        pages.append(buf.getvalue())

        print(f"  Page {page_num}: y={y} to y={bottom}")
        y += page_height - overlap  # subtract overlap for next page start

    # Combine into PDF
    pdf_bytes = img2pdf.convert(pages)
    Path(output_path).write_bytes(pdf_bytes)
    print(f"  Written: {output_path} ({page_num} pages)")
    return output_path

# Generate paginated PDF from a long documentation page
long_url_to_paginated_pdf(
    'https://yoursite.com/docs/api-reference',
    'api-reference.pdf',
    delay=3000
)

Approach 4: Scheduled PDF Reports

For recurring reports (weekly analytics, daily summaries, monthly invoices), combine the capture with a scheduler:

import schedule
import time
from datetime import datetime
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
import os

def generate_and_send_weekly_report():
    """Generate a PDF report and email it."""
    now = datetime.now()
    filename = f"weekly-report-{now.strftime('%Y-%m-%d')}.pdf"

    print(f"Generating {filename}...")

    urls_to_pdf([
        'https://yoursite.com/analytics/week',
        'https://yoursite.com/analytics/channels',
        'https://yoursite.com/analytics/revenue',
    ], filename, delay=3000)

    # Email it
    msg = MIMEMultipart()
    msg['Subject'] = f"Weekly Report — {now.strftime('%B %d, %Y')}"
    msg['From'] = os.environ['REPORT_FROM']
    msg['To'] = os.environ['REPORT_TO']
    msg.attach(MIMEText('Weekly report attached.', 'plain'))

    with open(filename, 'rb') as f:
        attachment = MIMEBase('application', 'octet-stream')
        attachment.set_payload(f.read())
        encoders.encode_base64(attachment)
        attachment.add_header('Content-Disposition', f'attachment; filename="{filename}"')
        msg.attach(attachment)

    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
        smtp.login(os.environ['SMTP_USER'], os.environ['SMTP_PASSWORD'])
        smtp.sendmail(msg['From'], [msg['To']], msg.as_string())

    print(f"Sent: {filename}")
    os.remove(filename)  # clean up local file

# Schedule: every Monday at 08:00
schedule.every().monday.at("08:00").do(generate_and_send_weekly_report)

while True:
    schedule.run_pending()
    time.sleep(60)

Adding a Cover Page

For formal reports, you may want a programmatic cover page before the screenshot pages:

from PIL import Image, ImageDraw, ImageFont
import io

def create_cover_page(title, subtitle, date_str, width=1280, height=1123):
    """Create a simple cover page image."""
    img = Image.new('RGB', (width, height), color='white')
    draw = ImageDraw.Draw(img)

    # Title (large, centered)
    # Use default font if custom not available
    try:
        title_font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 64)
        subtitle_font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', 36)
        date_font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', 28)
    except OSError:
        title_font = subtitle_font = date_font = ImageFont.load_default()

    # Draw title
    title_bbox = draw.textbbox((0, 0), title, font=title_font)
    title_x = (width - (title_bbox[2] - title_bbox[0])) // 2
    draw.text((title_x, height // 3), title, fill='#1a1a1a', font=title_font)

    # Draw subtitle
    sub_bbox = draw.textbbox((0, 0), subtitle, font=subtitle_font)
    sub_x = (width - (sub_bbox[2] - sub_bbox[0])) // 2
    draw.text((sub_x, height // 3 + 100), subtitle, fill='#555555', font=subtitle_font)

    # Draw date
    date_bbox = draw.textbbox((0, 0), date_str, font=date_font)
    date_x = (width - (date_bbox[2] - date_bbox[0])) // 2
    draw.text((date_x, height // 2 + 50), date_str, fill='#888888', font=date_font)

    # Bottom line
    draw.line([(100, height - 100), (width - 100, height - 100)], fill='#cccccc', width=2)

    buf = io.BytesIO()
    img.save(buf, format='PNG')
    return buf.getvalue()

def urls_to_pdf_with_cover(title, subtitle, urls, output_path, **kwargs):
    """Generate a PDF with a cover page followed by URL screenshots."""
    now = datetime.now()
    cover = create_cover_page(title, subtitle, now.strftime('%B %d, %Y'))

    pages = [cover]
    for url in urls:
        resp = requests.get(
            SCREENSHOT_URL,
            params={'url': url, 'width': 1280, 'format': 'png',
                    'full_page': 'true', 'delay': kwargs.get('delay', 1000)},
            headers={'X-API-Key': API_KEY},
            timeout=60,
        )
        resp.raise_for_status()
        pages.append(resp.content)
        time.sleep(0.5)

    pdf_bytes = img2pdf.convert(pages)
    Path(output_path).write_bytes(pdf_bytes)
    return output_path

When to Use Each Approach

Use case Approach
Single dashboard/report page Approach 1 (single-page PDF)
Multi-section report Approach 2 (multiple URLs)
Long documentation page Approach 3 (paginated)
Recurring automated reports Approach 4 (scheduled)
Formal client deliverable Approach 4 + cover page

Dependencies

pip install img2pdf Pillow requests schedule

img2pdf handles the PNG-to-PDF conversion without any quality loss. Pillow (PIL) handles image manipulation for the paginated and cover page approaches. schedule is optional — use a cron job instead if you prefer.

Get Your API Key

Free API key at hermesforge.dev/screenshot. A weekly report with five pages costs 5 API calls — well within any free tier.