Watermarking and Securing PDFs

Visual watermarks handle branding and confidentiality labelling. Cryptographic controls handle access enforcement. Both belong at the end of any Automating PDF Extraction & Generation pipeline — after structural work such as Merging and Splitting PDF Documents is complete, and before the output reaches a recipient. Applying encryption mid-pipeline breaks merge operations and parsing steps; applying watermarks after encryption requires decryption first. Get the order right and both techniques compose cleanly.

This guide covers: generating ReportLab overlay templates, stamping pages with merge_page(), AES-256 encryption with writer.encrypt(), owner vs user passwords, permission flag bitmasks, and batch processing patterns.

Prerequisites

# pip install pypdf reportlab
pip install "pypdf>=3.17" "reportlab>=4.2"

You need at least one source PDF for testing. A minimal one-page file is sufficient for all examples here. Store it at ./input/source.pdf or adjust the path constants in the snippets.

Diagnostic Step: Inspect the PDF Before Applying Security

Before applying watermarks or encryption, verify the file's current state: is it already encrypted, what page size does it use, and does it contain form fields that watermarking might break?

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader

SOURCE = Path("./input/source.pdf")

try:
    reader = PdfReader(SOURCE)
    page = reader.pages[0]
    # MediaBox gives the page dimensions in points (1 pt = 1/72 inch)
    media_box = page.mediabox
    print(f"Pages       : {len(reader.pages)}")
    print(f"Encrypted   : {reader.is_encrypted}")
    print(f"Page width  : {float(media_box.width):.1f} pt  ({float(media_box.width)/72:.2f} in)")
    print(f"Page height : {float(media_box.height):.1f} pt  ({float(media_box.height)/72:.2f} in)")
    print(f"Has AcroForm: {'/AcroForm' in reader.trailer.get('/Root', {})}")
except FileNotFoundError:
    print(f"File not found: {SOURCE}")

If is_encrypted is True, decrypt before watermarking — see Remove a Password from a PDF with Python. If the page is letter-sized (612 × 792 pt), the snippets below work without modification. For A4 (595 × 842 pt), swap the pagesize constant in the ReportLab call.

Security Layers: How Watermarking and Encryption Compose

Step 1: Generate a Watermark Template with ReportLab

ReportLab produces a single-page PDF with transparent text or graphics. This file is reused across every page in a batch — generate it once, open it once.

# pip install reportlab
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4

WATERMARK_PATH = Path("./tmp/watermark_template.pdf")

def create_text_watermark(
    text: str = "CONFIDENTIAL",
    pagesize: tuple = letter,   # swap to A4 for European documents
    alpha: float = 0.25,        # 0.1 = very faint, 0.5 = noticeable
    font_size: int = 52,
) -> Path:
    """Generate a diagonal text watermark and save to WATERMARK_PATH."""
    WATERMARK_PATH.parent.mkdir(parents=True, exist_ok=True)
    width, height = pagesize
    c = canvas.Canvas(str(WATERMARK_PATH), pagesize=pagesize)
    c.saveState()
    # Move origin to page centre, rotate 45°, draw centred string
    c.translate(width / 2, height / 2)
    c.rotate(45)
    c.setFillAlpha(alpha)
    c.setFont("Helvetica-Bold", font_size)
    c.setFillColorRGB(0.3, 0.3, 0.3)
    # drawCentredString centres on x=0 (the translated origin)
    c.drawCentredString(0, 0, text)
    c.restoreState()
    c.save()
    return WATERMARK_PATH

Keep alpha between 0.1 and 0.4. Above 0.4, the overlay obscures body text on documents with light backgrounds.

Step 2: Stamp Pages with merge_page()

PdfWriter.merge_page() composites the watermark PDF page on top of each content page using PDF transparency semantics. The watermark layer is stamped over the content; use merge_page(watermark, over=False) to push it under the content instead (useful for background logos).

# pip install pypdf reportlab
from pathlib import Path
from pypdf import PdfReader, PdfWriter

INPUT_PDF  = Path("./input/source.pdf")
OUTPUT_PDF = Path("./output/watermarked.pdf")
WATERMARK  = Path("./tmp/watermark_template.pdf")


def stamp_watermark(
    source: Path,
    watermark: Path,
    output: Path,
    under: bool = False,     # True = watermark behind content (background mode)
) -> None:
    """Merge a watermark template onto every page of source PDF."""
    output.parent.mkdir(parents=True, exist_ok=True)
    try:
        wm_reader = PdfReader(watermark)
        wm_page   = wm_reader.pages[0]

        reader = PdfReader(source)
        writer = PdfWriter()

        for page in reader.pages:
            if under:
                # Clone watermark page, stamp content on top
                wm_copy = PdfReader(watermark).pages[0]
                wm_copy.merge_page(page)
                writer.add_page(wm_copy)
            else:
                # Stamp watermark on top of content
                page.merge_page(wm_page)
                writer.add_page(page)

        with open(output, "wb") as fh:
            writer.write(fh)
        print(f"Watermarked: {output}")
    except FileNotFoundError as exc:
        print(f"Missing file: {exc}")
    except Exception as exc:
        print(f"Watermark failed: {exc}")


if __name__ == "__main__":
    stamp_watermark(INPUT_PDF, WATERMARK, OUTPUT_PDF)

Step 3: Encrypt with AES-256 and Set Permission Flags

PdfWriter.encrypt() accepts a permissions_flag integer built from the pypdf.generic.PermissionFlags constants (or a raw bitmask). The owner password bypasses all restrictions; the user password enforces them.

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import PermissionFlags

INPUT_PDF   = Path("./output/watermarked.pdf")
SECURED_PDF = Path("./output/secured.pdf")

# Bitmask: allow printing and annotations, deny content copy and modification
PERMISSIONS = (
    PermissionFlags.PRINT_PRINTING
    | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
)


def encrypt_pdf(
    source: Path,
    output: Path,
    user_password: str,
    owner_password: str,
    permissions: int = PERMISSIONS,
    algorithm: str = "AES-256",
) -> None:
    """
    Encrypt source PDF and write to output.
    user_password  — required to open/view the document
    owner_password — grants full rights, overrides permission flags
    """
    output.parent.mkdir(parents=True, exist_ok=True)
    try:
        reader = PdfReader(source)
        writer = PdfWriter()
        for page in reader.pages:
            writer.add_page(page)

        writer.encrypt(
            user_password=user_password,
            owner_password=owner_password,
            permissions_flag=permissions,
            algorithm=algorithm,      # "AES-256" → PDF 2.0 compliant
        )

        with open(output, "wb") as fh:
            writer.write(fh)
        print(f"Encrypted ({algorithm}): {output}")
    except FileNotFoundError as exc:
        print(f"Source not found: {exc}")
    except Exception as exc:
        print(f"Encryption failed: {exc}")


if __name__ == "__main__":
    import os
    encrypt_pdf(
        INPUT_PDF,
        SECURED_PDF,
        user_password=os.environ["PDF_USER_PW"],    # never hardcode
        owner_password=os.environ["PDF_OWNER_PW"],
    )

Never hardcode passwords in source files. Pull them from environment variables or a secrets manager (AWS Secrets Manager, HashiCorp Vault, or even a local .env excluded from version control).

Owner vs User Password

Password type	Who holds it	What it unlocks
User password	End recipient	Open and view (within permission flags)
Owner password	Document author / admin	All operations; overrides every permission flag
No user password	—	File opens without password; flags still apply to non-owner opens

Setting user_password="" (empty string) leaves the document openable by anyone while still enforcing permission flags and requiring the owner password for editing. This is a common pattern for read-only distribution.

Permission Flags Reference

from pypdf.generic import PermissionFlags

# Common flag combinations
READ_ONLY  = 0                           # deny everything
PRINT_ONLY = PermissionFlags.PRINT_PRINTING
ANNOTATE   = (
    PermissionFlags.PRINT_PRINTING
    | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
)
FULL_EDIT  = (
    PermissionFlags.PRINT_PRINTING
    | PermissionFlags.MODIFY_CONTENTS
    | PermissionFlags.COPY_CONTENT
    | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
    | PermissionFlags.FILL_IN_EXISTING_FORM_FIELDS
    | PermissionFlags.EXTRACT_TEXT_AND_GRAPHICS
    | PermissionFlags.ASSEMBLE_DOCUMENT
    | PermissionFlags.PRINT_IN_HIGH_QUALITY
)

Edge Cases and Variants

Variant 1: Watermark + Encrypt in One Pass (in-memory)

Avoid writing an intermediate file by streaming through io.BytesIO. This matters when generating PDF reports dynamically and piping output directly to a secured response:

# pip install pypdf reportlab
import io, os
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter


def watermark_then_encrypt(
    source_path: Path,
    output_path: Path,
    wm_text: str,
    user_pw: str,
    owner_pw: str,
) -> None:
    """Generate watermark in-memory and encrypt in a single pipeline."""
    # 1. Build watermark in a BytesIO buffer
    buf = io.BytesIO()
    width, height = letter
    c = canvas.Canvas(buf, pagesize=letter)
    c.saveState()
    c.translate(width / 2, height / 2)
    c.rotate(45)
    c.setFillAlpha(0.25)
    c.setFont("Helvetica-Bold", 52)
    c.setFillColorRGB(0.3, 0.3, 0.3)
    c.drawCentredString(0, 0, wm_text)
    c.restoreState()
    c.save()
    buf.seek(0)

    # 2. Merge watermark onto each page
    wm_page = PdfReader(buf).pages[0]
    reader  = PdfReader(source_path)
    writer  = PdfWriter()
    for page in reader.pages:
        page.merge_page(wm_page)
        writer.add_page(page)

    # 3. Encrypt without touching the merged intermediate
    writer.encrypt(
        user_password=user_pw,
        owner_password=owner_pw,
        algorithm="AES-256",
    )

    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, "wb") as fh:
        writer.write(fh)
    print(f"Done: {output_path}")

Variant 2: RC4 Legacy Mode for Older Readers

Some embedded systems (older MFP scanners, kiosk PDF viewers) reject AES-256. Fall back to RC4-128 only when interoperability requires it — RC4 is cryptographically weak and should not be used for compliance:

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader, PdfWriter

def encrypt_rc4_legacy(source: Path, output: Path, user_pw: str, owner_pw: str) -> None:
    """RC4-128 for legacy reader compatibility only — not for compliance use."""
    reader = PdfReader(source)
    writer = PdfWriter()
    for page in reader.pages:
        writer.add_page(page)
    # "RC4-128" is accepted by pypdf but generates a PDF 1.4-compatible dict
    writer.encrypt(user_password=user_pw, owner_password=owner_pw, algorithm="RC4-128")
    with open(output, "wb") as fh:
        writer.write(fh)

Variant 3: Image Watermark (logo stamp)

For brand logos, draw a scaled image instead of text in the ReportLab canvas:

# pip install reportlab Pillow
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader

LOGO = Path("./assets/logo.png")
WM   = Path("./tmp/logo_watermark.pdf")


def create_image_watermark(logo: Path = LOGO, output: Path = WM, alpha: float = 0.2) -> None:
    width, height = letter
    c = canvas.Canvas(str(output), pagesize=letter)
    c.saveState()
    c.setFillAlpha(alpha)
    # Centre the logo; adjust width/height as needed
    logo_w, logo_h = 200, 80
    c.drawImage(
        ImageReader(str(logo)),
        (width - logo_w) / 2,
        (height - logo_h) / 2,
        width=logo_w,
        height=logo_h,
        mask="auto",        # honour PNG transparency
    )
    c.restoreState()
    c.save()

Validation

After applying watermark and encryption, verify both programmatically before delivery:

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader

def validate_secured_pdf(path: Path, user_pw: str, expected_pages: int) -> bool:
    """Assert the file is encrypted, decrypts cleanly, and has correct page count."""
    try:
        reader = PdfReader(path)

        if not reader.is_encrypted:
            print(f"FAIL: {path.name} is not encrypted")
            return False

        result = reader.decrypt(user_pw)
        if result == 0:
            print(f"FAIL: wrong password for {path.name}")
            return False

        actual = len(reader.pages)
        if actual != expected_pages:
            print(f"FAIL: expected {expected_pages} pages, got {actual}")
            return False

        print(f"PASS: {path.name} — encrypted, {actual} pages")
        return True
    except Exception as exc:
        print(f"ERROR: {exc}")
        return False

reader.decrypt() returns 0 on failure, 1 for user-password success, and 2 for owner-password success.

Performance and Scale Notes

Generate the watermark template once per batch, not once per file. A ReportLab canvas render takes ~5–10 ms; multiplied across thousands of documents it adds up.
Re-open the watermark reader once per batch and reuse the page object — PdfReader is not expensive to keep open.
Use multiprocessing for large batches. pypdf operations are CPU-bound and release the GIL between pages, so concurrent.futures.ProcessPoolExecutor with max_workers=os.cpu_count() gives near-linear throughput gains.
Memory ceiling. Each PdfReader/PdfWriter pair holds the full page tree in memory. For files over ~200 MB, stream with pypdf's clone_reader_document_root or process in chunks.

Troubleshooting

Error	Root cause	Fix
`PdfReadError: Stream has not been decrypted`	Trying to read pages from an encrypted file without calling `decrypt()`	Call `reader.decrypt(password)` immediately after opening; check the return value
`NotImplementedError: Encryption algorithm not supported`	Using `PyPDF2` (unmaintained) or a version of pypdf older than 3.0	`pip install "pypdf>=3.17"` and remove `PyPDF2` from `requirements.txt`
Watermark text missing from output	`merge_page` called but transparency not set; canvas not saved before `.save()`	Verify `c.saveState()` / `c.restoreState()` wrap the drawing calls; check `setFillAlpha` value
Permission flags ignored by Adobe Acrobat	File encrypted with user/owner password the same value	Always set owner and user passwords to different values; identical passwords disable flag enforcement in some readers
`FileNotDecryptedError` on decrypt	Wrong password passed to `reader.decrypt()`	Catch `pypdf.errors.FileNotDecryptedError`; surface a clear message rather than swallowing the exception

Complete Working Script

# pip install pypdf reportlab
"""
secure_pdfs.py — watermark and encrypt all PDFs in a directory.

Usage:
    PDF_USER_PW=viewer123 PDF_OWNER_PW=admin456 python secure_pdfs.py \
        --input ./raw --output ./secured --text "CONFIDENTIAL"
"""
import argparse
import io
import os
from pathlib import Path

from pypdf import PdfReader, PdfWriter
from pypdf.generic import PermissionFlags
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

PERMISSIONS = PermissionFlags.PRINT_PRINTING | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS


def build_watermark_buffer(text: str, alpha: float = 0.25) -> io.BytesIO:
    buf = io.BytesIO()
    width, height = letter
    c = canvas.Canvas(buf, pagesize=letter)
    c.saveState()
    c.translate(width / 2, height / 2)
    c.rotate(45)
    c.setFillAlpha(alpha)
    c.setFont("Helvetica-Bold", 52)
    c.setFillColorRGB(0.3, 0.3, 0.3)
    c.drawCentredString(0, 0, text)
    c.restoreState()
    c.save()
    buf.seek(0)
    return buf


def process_file(
    source: Path,
    output: Path,
    wm_page,          # pre-loaded watermark page object
    user_pw: str,
    owner_pw: str,
) -> bool:
    try:
        reader = PdfReader(source)
        writer = PdfWriter()
        for page in reader.pages:
            page.merge_page(wm_page)
            writer.add_page(page)
        writer.encrypt(
            user_password=user_pw,
            owner_password=owner_pw,
            permissions_flag=PERMISSIONS,
            algorithm="AES-256",
        )
        with open(output, "wb") as fh:
            writer.write(fh)
        print(f"  secured: {output.name}")
        return True
    except Exception as exc:
        print(f"  SKIP {source.name}: {exc}")
        return False


def main() -> None:
    parser = argparse.ArgumentParser(description="Watermark and encrypt PDFs")
    parser.add_argument("--input",  type=Path, default=Path("./input"),  help="Source directory")
    parser.add_argument("--output", type=Path, default=Path("./output"), help="Output directory")
    parser.add_argument("--text",   default="CONFIDENTIAL",               help="Watermark text")
    args = parser.parse_args()

    user_pw  = os.environ.get("PDF_USER_PW", "")
    owner_pw = os.environ.get("PDF_OWNER_PW")
    if not owner_pw:
        raise SystemExit("Set PDF_OWNER_PW environment variable")

    args.output.mkdir(parents=True, exist_ok=True)

    # Build watermark once for the entire batch
    wm_buf  = build_watermark_buffer(args.text)
    wm_page = PdfReader(wm_buf).pages[0]

    pdfs = sorted(args.input.glob("*.pdf"))
    print(f"Processing {len(pdfs)} file(s) from {args.input}")
    ok = sum(
        process_file(p, args.output / f"secure_{p.name}", wm_page, user_pw, owner_pw)
        for p in pdfs
    )
    print(f"Done: {ok}/{len(pdfs)} succeeded")


if __name__ == "__main__":
    main()

Add Password Protection to PDF Files — detailed AES-256 encryption workflow with validation
Remove a Password from a PDF with Python — decrypt an authorized PDF and save an unencrypted copy
Merging and Splitting PDF Documents — complete structural edits before applying security
Generating PDF Reports Dynamically — pipe generated output directly into the watermark+encrypt step

Part of Automating PDF Extraction & Generation.

Explore next

Add Password Protection to PDF Files Remove a Password from a PDF with Python