likha-pdf
-simple markdown export with pandoc + lualatex.
+simple markdown to pdf export.
From d151ec91488f10134babae4d4a879d823b221b62 Mon Sep 17 00:00:00 2001
From: kj_sh604
Date: Wed, 11 Mar 2026 22:32:11 -0400
Subject: refactor: re-write in a fmailiar language to make it easier to
maintain
---
Dockerfile | 24 +-
README.md | 19 +-
requirements.txt | 5 +
src/app.nim | 525 -------------------------------------------
src/app.py | 568 +++++++++++++++++++++++++++++++++++++++++++++++
src/latex/template.tex | 88 --------
src/templates/index.html | 2 +-
7 files changed, 598 insertions(+), 633 deletions(-)
create mode 100644 requirements.txt
delete mode 100644 src/app.nim
create mode 100644 src/app.py
delete mode 100644 src/latex/template.tex
diff --git a/Dockerfile b/Dockerfile
index fef5037..e87147d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,23 +1,27 @@
-FROM ubuntu:24.04
+FROM python:3.12-slim
ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get install -y \
- nim \
- build-essential \
- pandoc \
- texlive-full \
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ libcairo2 \
+ libpango-1.0-0 \
+ libpangocairo-1.0-0 \
+ libgdk-pixbuf-2.0-0 \
+ libffi-dev \
+ shared-mime-info \
+ fonts-noto \
fonts-noto-color-emoji \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
-COPY src/ .
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
-RUN nim c -d:release --opt:size -o:likha-pdf app.nim
+COPY src/ .
RUN mkdir -p generated uploads
-EXPOSE 5000
+EXPOSE 5001
-CMD ["./likha-pdf"]
+CMD ["python3", "app.py"]
diff --git a/README.md b/README.md
index de613e8..7d7a087 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,21 @@
# likha-pdf
-a simple and crappy web app that converts markdown to pdf using pandoc and lualatex.
+a simple web app that converts markdown to pdf.
## features
- markdown to pdf export
-- crappy image upload (but it works)
-- emoji-capable latex template
+- image upload with markdown snippet insertion
+- paper size, margin, font, line spacing, and page number options
+- syntax-highlighted code blocks
+- always produces a pdf (reportlab fallback if weasyprint fails)
## requirements
-- nim 1.6+
-- pandoc
-- lualatex
+- python 3.10+
+- system packages: `libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 shared-mime-info`
## image usage
@@ -28,9 +29,9 @@ a simple and crappy web app that converts markdown to pdf using pandoc and luala
### local
```bash
+pip install -r requirements.txt
cd src/
-nim c -d:release -o:likha-pdf app.nim
-./likha-pdf
+python3 app.py
```
### docker
@@ -40,4 +41,4 @@ docker build -t likha-pdf .
docker run -p 5001:5001 likha-pdf
```
-open `http://localhost:5000`
+open `http://localhost:5001`
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e081602
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+flask==3.1.*
+markdown==3.7.*
+pygments==2.19.*
+weasyprint==63.*
+reportlab==4.3.*
diff --git a/src/app.nim b/src/app.nim
deleted file mode 100644
index 17f95b7..0000000
--- a/src/app.nim
+++ /dev/null
@@ -1,525 +0,0 @@
-import
- std/[
- asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri,
- random,
- ]
-
-# tiny backend in nimlang, may be stupid, but this was fun
-
-const
- AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"]
- ValidPaperSizes = [
- "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper",
- "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper",
- "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper",
- "ledgerpaper", "tabloid", "statement", "flsa",
- ]
- ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"]
- ValidLineSpacings = ["1", "1.5", "2"]
- CustomPaperDimensions = [
- ("tabloid", "11in", "17in"),
- ("statement", "5.5in", "8.5in"),
- ("flsa", "8.5in", "13in"),
- ]
-
-const AppName = "likha-pdf"
-
-proc lookupCustomPaper(name: string): tuple[width: string, height: string] =
- for (paperName, w, h) in CustomPaperDimensions:
- if paperName == name:
- return (width: w, height: h)
- (width: "", height: "")
-
-proc baseDir(): string {.inline.} =
- getAppDir()
-
-proc generatedDir(): string {.inline.} =
- baseDir() / "generated"
-
-proc uploadsDir(): string {.inline.} =
- baseDir() / "uploads"
-
-proc latexTemplatePath(): string {.inline.} =
- baseDir() / "latex" / "template.tex"
-
-proc templatesDir(): string {.inline.} =
- baseDir() / "templates"
-
-proc partialsDir(): string {.inline.} =
- templatesDir() / "partials"
-
-proc staticDir(): string {.inline.} =
- baseDir() / "static"
-
-type MultipartPart = object
- name: string
- filename: string
- contentType: string
- content: string
-
-# helpers
-proc htmlEscape(value: string): string =
- result = value
- result = result.replace("&", "&")
- result = result.replace("<", "<")
- result = result.replace(">", ">")
- result = result.replace("\"", """)
- result = result.replace("'", "'")
-
-proc randomHex(length: int): string =
- const hexChars = "0123456789abcdef"
- result = newStringOfCap(length)
- for _ in 0 ..< length:
- result.add(hexChars[rand(15)])
-
-proc renderTemplate(
- filePath: string, replacements: openArray[(string, string)]
-): string =
- result = readFile(filePath)
- for (token, replacement) in replacements:
- result = result.replace(token, replacement)
-
-proc decodeFormComponent(value: string): string =
- decodeUrl(value.replace("+", " "))
-
-proc parseUrlEncoded(body: string): Table[string, string] =
- result = initTable[string, string]()
- if body.len == 0:
- return
-
- for pair in body.split("&"):
- if pair.len == 0:
- continue
- let separator = pair.find('=')
- if separator < 0:
- result[decodeFormComponent(pair)] = ""
- else:
- let key = decodeFormComponent(pair[0 ..< separator])
- let value = decodeFormComponent(pair[separator + 1 .. ^1])
- result[key] = value
-
-# "options" are optional, defaults are forever.
-proc pickOption(value: string, fallback: string, options: openArray[string]): string =
- for option in options:
- if option == value:
- return value
- fallback
-
-proc sanitizeFilename(filename: string): string =
- result = newStringOfCap(filename.len)
- for ch in filename:
- if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or
- (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}):
- result.add(ch)
- elif ch == ' ':
- result.add('_')
-
-proc baseFilename(value: string): string =
- var normalized = value.replace("\\", "/")
- let index = normalized.rfind('/')
- if index >= 0 and index < normalized.high:
- normalized = normalized[index + 1 .. ^1]
- elif index == normalized.high:
- normalized = ""
- normalized
-
-proc isAllowedImage(filename: string): bool =
- let dot = filename.rfind('.')
- if dot < 1 or dot == filename.high:
- return false
- let extension = filename[dot + 1 .. ^1].toLowerAscii()
- for allowed in AllowedImageExtensions:
- if extension == allowed:
- return true
- false
-
-proc tailText(value: string, maxLen: int = 1200): string =
- if value.len <= maxLen:
- return value
- value[value.len - maxLen .. ^1]
-
-proc extractBoundary(contentType: string): string =
- for part in contentType.split(';'):
- let token = part.strip()
- if token.toLowerAscii().startsWith("boundary="):
- return token[9 .. ^1].strip(chars = {'\"', '\''})
- ""
-
-proc stripTrailingCrlf(value: string): string =
- result = value
- if result.len >= 2 and result.endsWith("\r\n"):
- result.setLen(result.len - 2)
-
-# hand-rolled multipart parsing, yes i am aware that this is "eh"
-proc parseMultipart(body: string, boundary: string): seq[MultipartPart] =
- let delimiter = "--" & boundary
- for rawChunk in body.split(delimiter):
- var chunk = rawChunk
- if chunk.len == 0:
- continue
- if chunk == "--" or chunk == "--\r\n":
- continue
- if chunk.startsWith("\r\n"):
- chunk = chunk[2 .. ^1]
-
- chunk = stripTrailingCrlf(chunk)
-
- if chunk.len == 2 and chunk == "--":
- continue
-
- let splitIndex = chunk.find("\r\n\r\n")
- if splitIndex < 0:
- continue
-
- let headerBlock = chunk[0 ..< splitIndex]
- var content = chunk[splitIndex + 4 .. ^1]
- content = stripTrailingCrlf(content)
-
- var name = ""
- var filename = ""
- var contentType = "application/octet-stream"
-
- for line in headerBlock.split("\r\n"):
- let separator = line.find(':')
- if separator <= 0:
- continue
- let headerName = line[0 ..< separator].strip().toLowerAscii()
- let headerValue = line[separator + 1 .. ^1].strip()
-
- if headerName == "content-disposition":
- for part in headerValue.split(';'):
- let token = part.strip()
- if token.startsWith("name="):
- name = token[5 .. ^1].strip(chars = {'\"', '\''})
- elif token.startsWith("filename="):
- filename = token[9 .. ^1].strip(chars = {'\"', '\''})
- elif headerName == "content-type":
- contentType = headerValue
-
- if name.len > 0:
- result.add(
- MultipartPart(
- name: name, filename: filename, contentType: contentType, content: content
- )
- )
-
-proc isSafeRelativePath(pathPart: string): bool =
- pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and
- not pathPart.startsWith("/")
-
-proc fileContentType(filePath: string): string =
- let lowered = filePath.toLowerAscii()
- if lowered.endsWith(".js"):
- return "application/javascript; charset=utf-8"
- if lowered.endsWith(".css"):
- return "text/css; charset=utf-8"
- if lowered.endsWith(".html"):
- return "text/html; charset=utf-8"
- if lowered.endsWith(".png"):
- return "image/png"
- if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"):
- return "image/jpeg"
- if lowered.endsWith(".gif"):
- return "image/gif"
- if lowered.endsWith(".webp"):
- return "image/webp"
- if lowered.endsWith(".svg"):
- return "image/svg+xml"
- if lowered.endsWith(".pdf"):
- return "application/pdf"
- "application/octet-stream"
-
-# response wrappers
-proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} =
- let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"})
- await req.respond(code, content, headers)
-
-proc respondText(req: Request, code: HttpCode, content: string) {.async.} =
- let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"})
- await req.respond(code, content, headers)
-
-proc respondFile(
- req: Request,
- filePath: string,
- asAttachment: bool = false,
- attachmentName: string = "",
-) {.async.} =
- if not fileExists(filePath):
- await respondText(req, Http404, "Not found")
- return
-
- var headers = newHttpHeaders()
- headers["Content-Type"] = fileContentType(filePath)
- if asAttachment and attachmentName.len > 0:
- headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\""
-
- await req.respond(Http200, readFile(filePath), headers)
-
-# pandoc does the heavy lifting
-proc runPandoc(
- sourceMarkdown: string,
- outputPath: string,
- paperSize: string,
- margin: string,
- mainFont: string,
- lineSpacing: string,
- showPageNumbers: bool,
-): tuple[ok: bool, output: string, missingPandoc: bool] =
- let tempDir = getTempDir() / (AppName & "-" & randomHex(10))
- createDir(tempDir)
- let tempMarkdownPath = tempDir / "source.md"
- let tempRawPath = tempDir / "raw.md"
-
- try:
- # write raw markdown first
- writeFile(tempRawPath, sourceMarkdown)
-
- # preprocess markdown: convert to ascii with transliteration and normalize quotes
- let iconvCmd =
- "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) &
- " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath)
- let (_, iconvExitCode) = execCmdEx(iconvCmd)
-
- if iconvExitCode != 0:
- # if preprocessing fails, fall back to original content
- writeFile(tempMarkdownPath, sourceMarkdown)
-
- var args = @[
- tempMarkdownPath,
- "--from",
- "markdown+emoji+hard_line_breaks",
- "--pdf-engine=lualatex",
- "--template",
- latexTemplatePath(),
- "-V",
- "margin=" & margin,
- "-V",
- "mainfont=" & mainFont,
- "-V",
- "linespacing=" & lineSpacing,
- "--resource-path",
- baseDir() & ":" & uploadsDir() & ":" & tempDir,
- "-o",
- outputPath,
- ]
-
- let dims = lookupCustomPaper(paperSize)
- if dims.width.len > 0:
- args.add("-V")
- args.add("paperwidth=" & dims.width)
- args.add("-V")
- args.add("paperheight=" & dims.height)
- else:
- args.add("-V")
- args.add("papersize=" & paperSize)
-
- if not showPageNumbers:
- args.add("-V")
- args.add("hidepages=true")
-
- var process: Process
- try:
- process =
- startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut})
- except OSError:
- return (
- ok: false,
- output: "Pandoc is not installed or not in PATH.",
- missingPandoc: true,
- )
-
- let output = process.outputStream.readAll()
- let exitCode = process.waitForExit()
- process.close()
-
- if exitCode == 0:
- return (ok: true, output: "", missingPandoc: false)
- return (ok: false, output: output, missingPandoc: false)
- finally:
- try:
- if fileExists(tempRawPath):
- removeFile(tempRawPath)
- if fileExists(tempMarkdownPath):
- removeFile(tempMarkdownPath)
- if dirExists(tempDir):
- removeDir(tempDir)
- except OSError:
- discard
-
-# app endpoint: strict inputs, loud errors.
-proc handleConvert(req: Request) {.async.} =
- let formData = parseUrlEncoded(req.body)
- let markdown = formData.getOrDefault("markdown", "").strip()
-
- if markdown.len == 0:
- let html = renderTemplate(
- partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")]
- )
- await respondHtml(req, Http400, html)
- return
-
- let paperSize =
- pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes)
- let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins)
-
- var mainFontFamily = formData.getOrDefault("main_font", "serif")
- if mainFontFamily != "serif" and mainFontFamily != "sans":
- mainFontFamily = "serif"
-
- let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella"
- let lineSpacing =
- pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings)
- let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on"
- let epoch = int(getTime().toUnix())
- let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf"
- let outputPath = generatedDir() / outputName
-
- let conversion = runPandoc(
- markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers
- )
-
- if not conversion.ok:
- let message =
- if conversion.missingPandoc:
- conversion.output
- else:
- let stderr = conversion.output.strip()
- if stderr.len > 0:
- tailText(stderr)
- else:
- "PDF conversion failed."
-
- let html = renderTemplate(
- partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))]
- )
- let code = if conversion.missingPandoc: Http500 else: Http400
- await respondHtml(req, code, html)
- return
-
- let html = renderTemplate(
- partialsDir() / "result.html",
- [
- ("{{ filename }}", htmlEscape(outputName)),
- ("{{ download_url }}", "/download/" & encodeUrl(outputName)),
- ],
- )
- await respondHtml(req, Http200, html)
-
-# upload endpoint. accepts image, returns markdown snippet
-proc handleUploadImage(req: Request) {.async.} =
- let contentType = req.headers.getOrDefault("Content-Type")
- let boundary = extractBoundary(contentType)
-
- if boundary.len == 0:
- let html = renderTemplate(
- partialsDir() / "upload_error.html",
- [("{{ message }}", "image file is required.")],
- )
- await respondHtml(req, Http400, html)
- return
-
- let parts = parseMultipart(req.body, boundary)
- var imagePart: MultipartPart
- var foundImage = false
- for part in parts:
- if part.name == "image":
- imagePart = part
- foundImage = true
- break
-
- if not foundImage or imagePart.filename.strip().len == 0:
- let html = renderTemplate(
- partialsDir() / "upload_error.html",
- [("{{ message }}", "image file is required.")],
- )
- await respondHtml(req, Http400, html)
- return
-
- let originalName = sanitizeFilename(baseFilename(imagePart.filename))
- if originalName.len == 0 or not isAllowedImage(originalName):
- let html = renderTemplate(
- partialsDir() / "upload_error.html",
- [("{{ message }}", "unsupported image type.")],
- )
- await respondHtml(req, Http400, html)
- return
-
- let extensionStart = originalName.rfind('.')
- let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii()
-
- let epoch = int(getTime().toUnix())
- let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension
- let imagePath = uploadsDir() / storedName
-
- writeFile(imagePath, imagePart.content)
-
- let markdownSnippet = ""
- let html = renderTemplate(
- partialsDir() / "upload_result.html",
- [
- ("{{ filename }}", htmlEscape(storedName)),
- ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)),
- ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)),
- ],
- )
- await respondHtml(req, Http200, html)
-
-# router table
-proc route(req: Request) {.async.} =
- let path = req.url.path
-
- if req.reqMethod == HttpGet and path == "/":
- await respondFile(req, templatesDir() / "index.html")
- return
-
- if req.reqMethod == HttpGet and path.startsWith("/static/"):
- let relativePath = decodeUrl(path[8 .. ^1])
- if not isSafeRelativePath(relativePath):
- await respondText(req, Http400, "Invalid path")
- return
- await respondFile(req, staticDir() / relativePath)
- return
-
- if req.reqMethod == HttpGet and path.startsWith("/uploads/"):
- let relativePath = decodeUrl(path[9 .. ^1])
- if not isSafeRelativePath(relativePath):
- await respondText(req, Http400, "Invalid path")
- return
- await respondFile(req, uploadsDir() / relativePath)
- return
-
- if req.reqMethod == HttpGet and path.startsWith("/download/"):
- let relativePath = decodeUrl(path[10 .. ^1])
- if not isSafeRelativePath(relativePath):
- await respondText(req, Http400, "Invalid path")
- return
- await respondFile(
- req,
- generatedDir() / relativePath,
- asAttachment = true,
- attachmentName = relativePath,
- )
- return
-
- if req.reqMethod == HttpPost and path == "/convert":
- await handleConvert(req)
- return
-
- if req.reqMethod == HttpPost and path == "/upload-image":
- await handleUploadImage(req)
- return
-
- await respondText(req, Http404, "Not found")
-
-# server boot, then we let htmx do htmx things.
-when isMainModule:
- randomize()
-
- if not dirExists(generatedDir()):
- createDir(generatedDir())
- if not dirExists(uploadsDir()):
- createDir(uploadsDir())
-
- let server = newAsyncHttpServer()
- echo "listening on http://localhost:5001"
- waitFor server.serve(Port(5001), route)
diff --git a/src/app.py b/src/app.py
new file mode 100644
index 0000000..88deef4
--- /dev/null
+++ b/src/app.py
@@ -0,0 +1,568 @@
+#!/usr/bin/env python3
+
+# likha-pdf — markdown to pdf, no latex required
+# converts markdown to html, then html to pdf via weasyprint
+# falls back to reportlab if weasyprint chokes — a pdf is always produced
+
+import os
+import re
+import secrets
+import time
+
+from flask import (
+ Flask,
+ request,
+ send_from_directory,
+ render_template_string,
+ abort,
+)
+from markupsafe import escape
+from markdown import markdown
+from pygments.formatters import HtmlFormatter
+from weasyprint import HTML
+
+APP_NAME = "likha-pdf"
+PORT = 5001
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+GENERATED_DIR = os.path.join(BASE_DIR, "generated")
+UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
+TEMPLATES_DIR = os.path.join(BASE_DIR, "templates")
+PARTIALS_DIR = os.path.join(TEMPLATES_DIR, "partials")
+STATIC_DIR = os.path.join(BASE_DIR, "static")
+
+ALLOWED_IMAGE_EXTS = {"png", "jpg", "jpeg", "gif", "webp", "svg"}
+
+VALID_PAPER_SIZES = {
+ "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper",
+ "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper",
+ "c4paper", "c5paper", "c6paper",
+ "letterpaper", "legalpaper", "executivepaper",
+ "ledgerpaper", "tabloid", "statement", "flsa",
+}
+
+VALID_MARGINS = {"0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"}
+
+VALID_LINE_SPACINGS = {"1", "1.5", "2"}
+
+# css page dimensions for each paper size
+PAPER_CSS = {
+ "a0paper": "841mm 1189mm",
+ "a1paper": "594mm 841mm",
+ "a2paper": "420mm 594mm",
+ "a3paper": "297mm 420mm",
+ "a4paper": "210mm 297mm",
+ "a5paper": "148mm 210mm",
+ "a6paper": "105mm 148mm",
+ "b0paper": "1000mm 1414mm",
+ "b1paper": "707mm 1000mm",
+ "b2paper": "500mm 707mm",
+ "b3paper": "353mm 500mm",
+ "b4paper": "250mm 353mm",
+ "b5paper": "176mm 250mm",
+ "b6paper": "125mm 176mm",
+ "c4paper": "229mm 324mm",
+ "c5paper": "162mm 229mm",
+ "c6paper": "114mm 162mm",
+ "letterpaper": "8.5in 11in",
+ "legalpaper": "8.5in 14in",
+ "executivepaper": "7in 10in",
+ "ledgerpaper": "17in 11in",
+ "tabloid": "11in 17in",
+ "statement": "5.5in 8.5in",
+ "flsa": "8.5in 13in",
+}
+
+MARKDOWN_EXTENSIONS = [
+ "tables",
+ "fenced_code",
+ "codehilite",
+ "nl2br",
+ "sane_lists",
+ "smarty",
+ "toc",
+ "attr_list",
+ "md_in_html",
+]
+
+MARKDOWN_EXT_CONFIG = {
+ "codehilite": {
+ "css_class": "highlight",
+ "guess_lang": True,
+ "noclasses": True,
+ },
+}
+
+app = Flask(
+ __name__,
+ template_folder=TEMPLATES_DIR,
+ static_folder=STATIC_DIR,
+ static_url_path="/static",
+)
+app.config["MAX_CONTENT_LENGTH"] = 64 * 1024 * 1024 # 64 MB
+
+
+# helpers
+def random_hex(length=32):
+ return secrets.token_hex(length // 2)
+
+
+def pick_option(value, fallback, valid):
+ return value if value in valid else fallback
+
+
+def sanitize_filename(name):
+ """keep only safe characters in a filename"""
+ name = os.path.basename(name.replace("\\", "/"))
+ out = []
+ for ch in name:
+ if ch.isalnum() or ch in "-_.":
+ out.append(ch)
+ elif ch == " ":
+ out.append("_")
+ return "".join(out)
+
+
+def is_allowed_image(filename):
+ dot = filename.rfind(".")
+ if dot < 1 or dot == len(filename) - 1:
+ return False
+ ext = filename[dot + 1:].lower()
+ return ext in ALLOWED_IMAGE_EXTS
+
+
+def is_safe_relative_path(path_part):
+ return (
+ bool(path_part)
+ and ".." not in path_part
+ and "\\" not in path_part
+ and not path_part.startswith("/")
+ )
+
+
+def read_partial(name, replacements=None):
+ """read a partial html template and apply replacements"""
+ path = os.path.join(PARTIALS_DIR, name)
+ with open(path, "r", encoding="utf-8") as f:
+ content = f.read()
+ if replacements:
+ for token, value in replacements.items():
+ content = content.replace(token, value)
+ return content
+
+
+def tail_text(value, max_len=1200):
+ if len(value) <= max_len:
+ return value
+ return value[-max_len:]
+
+
+# pdf stylesheet generator
+def build_pdf_css(paper_size, margin, font_family, line_spacing, show_page_numbers):
+ """build the css for weasyprint pdf rendering"""
+ page_dims = PAPER_CSS.get(paper_size, "8.5in 11in")
+
+ if font_family == "sans":
+ font_stack = '"Helvetica Neue", Helvetica, Arial, "Noto Sans", sans-serif'
+ else:
+ font_stack = '"Georgia", "Noto Serif", "Times New Roman", serif'
+
+ page_number_css = ""
+ if show_page_numbers:
+ page_number_css = """
+ @bottom-center {
+ content: counter(page);
+ font-size: 9pt;
+ color: #666;
+ }"""
+
+ return f"""
+@page {{
+ size: {page_dims};
+ margin: {margin};{page_number_css}
+}}
+
+body {{
+ font-family: {font_stack};
+ font-size: 11pt;
+ line-height: {line_spacing};
+ color: #000;
+ word-wrap: break-word;
+ overflow-wrap: break-word;
+}}
+
+h1, h2, h3, h4, h5, h6 {{
+ margin-top: 1em;
+ margin-bottom: 0.4em;
+ page-break-after: avoid;
+}}
+
+h1 {{ font-size: 20pt; }}
+h2 {{ font-size: 16pt; }}
+h3 {{ font-size: 13pt; }}
+h4 {{ font-size: 11pt; }}
+
+p {{
+ margin: 0 0 0.6em 0;
+}}
+
+pre {{
+ background: #f5f5f5;
+ border: 1px solid #ddd;
+ border-radius: 3px;
+ padding: 0.6em;
+ font-size: 9pt;
+ white-space: pre-wrap;
+ word-wrap: break-word;
+ overflow-wrap: break-word;
+ page-break-inside: avoid;
+}}
+
+code {{
+ font-family: "Courier New", Courier, "Liberation Mono", monospace;
+ font-size: 9pt;
+}}
+
+p > code, li > code {{
+ background: #f0f0f0;
+ padding: 0.1em 0.3em;
+ border-radius: 2px;
+}}
+
+blockquote {{
+ border-left: 3px solid #ccc;
+ margin: 0.6em 0;
+ padding: 0.3em 0.8em;
+ color: #555;
+}}
+
+table {{
+ border-collapse: collapse;
+ width: 100%;
+ margin: 0.6em 0;
+ page-break-inside: avoid;
+}}
+
+th, td {{
+ border: 1px solid #ccc;
+ padding: 0.4em 0.6em;
+ text-align: left;
+}}
+
+th {{
+ background: #f5f5f5;
+ font-weight: bold;
+}}
+
+img {{
+ max-width: 100%;
+ height: auto;
+}}
+
+a {{
+ color: #0066cc;
+ text-decoration: underline;
+}}
+
+hr {{
+ border: none;
+ border-top: 1px solid #ccc;
+ margin: 1em 0;
+}}
+
+ul, ol {{
+ margin: 0.4em 0;
+ padding-left: 1.5em;
+}}
+
+li {{
+ margin-bottom: 0.2em;
+}}
+"""
+
+
+# pdf conversion
+def markdown_to_html(source):
+ """convert markdown text to an html fragment"""
+ return markdown(
+ source,
+ extensions=MARKDOWN_EXTENSIONS,
+ extension_configs=MARKDOWN_EXT_CONFIG,
+ )
+
+
+def build_full_html(body_html, css):
+ """wrap the converted html body in a full document with styles"""
+ return f"""
+
+
simple markdown export with pandoc + lualatex.
+simple markdown to pdf export.