From d151ec91488f10134babae4d4a879d823b221b62 Mon Sep 17 00:00:00 2001 From: kj_sh604 Date: Wed, 11 Mar 2026 22:32:11 -0400 Subject: refactor: re-write in a fmailiar language to make it easier to maintain --- Dockerfile | 24 +- README.md | 19 +- requirements.txt | 5 + src/app.nim | 525 ------------------------------------------- src/app.py | 568 +++++++++++++++++++++++++++++++++++++++++++++++ src/latex/template.tex | 88 -------- src/templates/index.html | 2 +- 7 files changed, 598 insertions(+), 633 deletions(-) create mode 100644 requirements.txt delete mode 100644 src/app.nim create mode 100644 src/app.py delete mode 100644 src/latex/template.tex diff --git a/Dockerfile b/Dockerfile index fef5037..e87147d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,27 @@ -FROM ubuntu:24.04 +FROM python:3.12-slim ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - nim \ - build-essential \ - pandoc \ - texlive-full \ +RUN apt-get update && apt-get install -y --no-install-recommends \ + libcairo2 \ + libpango-1.0-0 \ + libpangocairo-1.0-0 \ + libgdk-pixbuf-2.0-0 \ + libffi-dev \ + shared-mime-info \ + fonts-noto \ fonts-noto-color-emoji \ && rm -rf /var/lib/apt/lists/* WORKDIR /app -COPY src/ . +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt -RUN nim c -d:release --opt:size -o:likha-pdf app.nim +COPY src/ . RUN mkdir -p generated uploads -EXPOSE 5000 +EXPOSE 5001 -CMD ["./likha-pdf"] +CMD ["python3", "app.py"] diff --git a/README.md b/README.md index de613e8..7d7a087 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,21 @@ # likha-pdf -a simple and crappy web app that converts markdown to pdf using pandoc and lualatex. +a simple web app that converts markdown to pdf. likha-pdf screenshot ## features - markdown to pdf export -- crappy image upload (but it works) -- emoji-capable latex template +- image upload with markdown snippet insertion +- paper size, margin, font, line spacing, and page number options +- syntax-highlighted code blocks +- always produces a pdf (reportlab fallback if weasyprint fails) ## requirements -- nim 1.6+ -- pandoc -- lualatex +- python 3.10+ +- system packages: `libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 shared-mime-info` ## image usage @@ -28,9 +29,9 @@ a simple and crappy web app that converts markdown to pdf using pandoc and luala ### local ```bash +pip install -r requirements.txt cd src/ -nim c -d:release -o:likha-pdf app.nim -./likha-pdf +python3 app.py ``` ### docker @@ -40,4 +41,4 @@ docker build -t likha-pdf . docker run -p 5001:5001 likha-pdf ``` -open `http://localhost:5000` +open `http://localhost:5001` diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e081602 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +flask==3.1.* +markdown==3.7.* +pygments==2.19.* +weasyprint==63.* +reportlab==4.3.* diff --git a/src/app.nim b/src/app.nim deleted file mode 100644 index 17f95b7..0000000 --- a/src/app.nim +++ /dev/null @@ -1,525 +0,0 @@ -import - std/[ - asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri, - random, - ] - -# tiny backend in nimlang, may be stupid, but this was fun - -const - AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"] - ValidPaperSizes = [ - "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper", - "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper", - "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper", - "ledgerpaper", "tabloid", "statement", "flsa", - ] - ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"] - ValidLineSpacings = ["1", "1.5", "2"] - CustomPaperDimensions = [ - ("tabloid", "11in", "17in"), - ("statement", "5.5in", "8.5in"), - ("flsa", "8.5in", "13in"), - ] - -const AppName = "likha-pdf" - -proc lookupCustomPaper(name: string): tuple[width: string, height: string] = - for (paperName, w, h) in CustomPaperDimensions: - if paperName == name: - return (width: w, height: h) - (width: "", height: "") - -proc baseDir(): string {.inline.} = - getAppDir() - -proc generatedDir(): string {.inline.} = - baseDir() / "generated" - -proc uploadsDir(): string {.inline.} = - baseDir() / "uploads" - -proc latexTemplatePath(): string {.inline.} = - baseDir() / "latex" / "template.tex" - -proc templatesDir(): string {.inline.} = - baseDir() / "templates" - -proc partialsDir(): string {.inline.} = - templatesDir() / "partials" - -proc staticDir(): string {.inline.} = - baseDir() / "static" - -type MultipartPart = object - name: string - filename: string - contentType: string - content: string - -# helpers -proc htmlEscape(value: string): string = - result = value - result = result.replace("&", "&") - result = result.replace("<", "<") - result = result.replace(">", ">") - result = result.replace("\"", """) - result = result.replace("'", "'") - -proc randomHex(length: int): string = - const hexChars = "0123456789abcdef" - result = newStringOfCap(length) - for _ in 0 ..< length: - result.add(hexChars[rand(15)]) - -proc renderTemplate( - filePath: string, replacements: openArray[(string, string)] -): string = - result = readFile(filePath) - for (token, replacement) in replacements: - result = result.replace(token, replacement) - -proc decodeFormComponent(value: string): string = - decodeUrl(value.replace("+", " ")) - -proc parseUrlEncoded(body: string): Table[string, string] = - result = initTable[string, string]() - if body.len == 0: - return - - for pair in body.split("&"): - if pair.len == 0: - continue - let separator = pair.find('=') - if separator < 0: - result[decodeFormComponent(pair)] = "" - else: - let key = decodeFormComponent(pair[0 ..< separator]) - let value = decodeFormComponent(pair[separator + 1 .. ^1]) - result[key] = value - -# "options" are optional, defaults are forever. -proc pickOption(value: string, fallback: string, options: openArray[string]): string = - for option in options: - if option == value: - return value - fallback - -proc sanitizeFilename(filename: string): string = - result = newStringOfCap(filename.len) - for ch in filename: - if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or - (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}): - result.add(ch) - elif ch == ' ': - result.add('_') - -proc baseFilename(value: string): string = - var normalized = value.replace("\\", "/") - let index = normalized.rfind('/') - if index >= 0 and index < normalized.high: - normalized = normalized[index + 1 .. ^1] - elif index == normalized.high: - normalized = "" - normalized - -proc isAllowedImage(filename: string): bool = - let dot = filename.rfind('.') - if dot < 1 or dot == filename.high: - return false - let extension = filename[dot + 1 .. ^1].toLowerAscii() - for allowed in AllowedImageExtensions: - if extension == allowed: - return true - false - -proc tailText(value: string, maxLen: int = 1200): string = - if value.len <= maxLen: - return value - value[value.len - maxLen .. ^1] - -proc extractBoundary(contentType: string): string = - for part in contentType.split(';'): - let token = part.strip() - if token.toLowerAscii().startsWith("boundary="): - return token[9 .. ^1].strip(chars = {'\"', '\''}) - "" - -proc stripTrailingCrlf(value: string): string = - result = value - if result.len >= 2 and result.endsWith("\r\n"): - result.setLen(result.len - 2) - -# hand-rolled multipart parsing, yes i am aware that this is "eh" -proc parseMultipart(body: string, boundary: string): seq[MultipartPart] = - let delimiter = "--" & boundary - for rawChunk in body.split(delimiter): - var chunk = rawChunk - if chunk.len == 0: - continue - if chunk == "--" or chunk == "--\r\n": - continue - if chunk.startsWith("\r\n"): - chunk = chunk[2 .. ^1] - - chunk = stripTrailingCrlf(chunk) - - if chunk.len == 2 and chunk == "--": - continue - - let splitIndex = chunk.find("\r\n\r\n") - if splitIndex < 0: - continue - - let headerBlock = chunk[0 ..< splitIndex] - var content = chunk[splitIndex + 4 .. ^1] - content = stripTrailingCrlf(content) - - var name = "" - var filename = "" - var contentType = "application/octet-stream" - - for line in headerBlock.split("\r\n"): - let separator = line.find(':') - if separator <= 0: - continue - let headerName = line[0 ..< separator].strip().toLowerAscii() - let headerValue = line[separator + 1 .. ^1].strip() - - if headerName == "content-disposition": - for part in headerValue.split(';'): - let token = part.strip() - if token.startsWith("name="): - name = token[5 .. ^1].strip(chars = {'\"', '\''}) - elif token.startsWith("filename="): - filename = token[9 .. ^1].strip(chars = {'\"', '\''}) - elif headerName == "content-type": - contentType = headerValue - - if name.len > 0: - result.add( - MultipartPart( - name: name, filename: filename, contentType: contentType, content: content - ) - ) - -proc isSafeRelativePath(pathPart: string): bool = - pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and - not pathPart.startsWith("/") - -proc fileContentType(filePath: string): string = - let lowered = filePath.toLowerAscii() - if lowered.endsWith(".js"): - return "application/javascript; charset=utf-8" - if lowered.endsWith(".css"): - return "text/css; charset=utf-8" - if lowered.endsWith(".html"): - return "text/html; charset=utf-8" - if lowered.endsWith(".png"): - return "image/png" - if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"): - return "image/jpeg" - if lowered.endsWith(".gif"): - return "image/gif" - if lowered.endsWith(".webp"): - return "image/webp" - if lowered.endsWith(".svg"): - return "image/svg+xml" - if lowered.endsWith(".pdf"): - return "application/pdf" - "application/octet-stream" - -# response wrappers -proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} = - let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"}) - await req.respond(code, content, headers) - -proc respondText(req: Request, code: HttpCode, content: string) {.async.} = - let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"}) - await req.respond(code, content, headers) - -proc respondFile( - req: Request, - filePath: string, - asAttachment: bool = false, - attachmentName: string = "", -) {.async.} = - if not fileExists(filePath): - await respondText(req, Http404, "Not found") - return - - var headers = newHttpHeaders() - headers["Content-Type"] = fileContentType(filePath) - if asAttachment and attachmentName.len > 0: - headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\"" - - await req.respond(Http200, readFile(filePath), headers) - -# pandoc does the heavy lifting -proc runPandoc( - sourceMarkdown: string, - outputPath: string, - paperSize: string, - margin: string, - mainFont: string, - lineSpacing: string, - showPageNumbers: bool, -): tuple[ok: bool, output: string, missingPandoc: bool] = - let tempDir = getTempDir() / (AppName & "-" & randomHex(10)) - createDir(tempDir) - let tempMarkdownPath = tempDir / "source.md" - let tempRawPath = tempDir / "raw.md" - - try: - # write raw markdown first - writeFile(tempRawPath, sourceMarkdown) - - # preprocess markdown: convert to ascii with transliteration and normalize quotes - let iconvCmd = - "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) & - " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath) - let (_, iconvExitCode) = execCmdEx(iconvCmd) - - if iconvExitCode != 0: - # if preprocessing fails, fall back to original content - writeFile(tempMarkdownPath, sourceMarkdown) - - var args = @[ - tempMarkdownPath, - "--from", - "markdown+emoji+hard_line_breaks", - "--pdf-engine=lualatex", - "--template", - latexTemplatePath(), - "-V", - "margin=" & margin, - "-V", - "mainfont=" & mainFont, - "-V", - "linespacing=" & lineSpacing, - "--resource-path", - baseDir() & ":" & uploadsDir() & ":" & tempDir, - "-o", - outputPath, - ] - - let dims = lookupCustomPaper(paperSize) - if dims.width.len > 0: - args.add("-V") - args.add("paperwidth=" & dims.width) - args.add("-V") - args.add("paperheight=" & dims.height) - else: - args.add("-V") - args.add("papersize=" & paperSize) - - if not showPageNumbers: - args.add("-V") - args.add("hidepages=true") - - var process: Process - try: - process = - startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut}) - except OSError: - return ( - ok: false, - output: "Pandoc is not installed or not in PATH.", - missingPandoc: true, - ) - - let output = process.outputStream.readAll() - let exitCode = process.waitForExit() - process.close() - - if exitCode == 0: - return (ok: true, output: "", missingPandoc: false) - return (ok: false, output: output, missingPandoc: false) - finally: - try: - if fileExists(tempRawPath): - removeFile(tempRawPath) - if fileExists(tempMarkdownPath): - removeFile(tempMarkdownPath) - if dirExists(tempDir): - removeDir(tempDir) - except OSError: - discard - -# app endpoint: strict inputs, loud errors. -proc handleConvert(req: Request) {.async.} = - let formData = parseUrlEncoded(req.body) - let markdown = formData.getOrDefault("markdown", "").strip() - - if markdown.len == 0: - let html = renderTemplate( - partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")] - ) - await respondHtml(req, Http400, html) - return - - let paperSize = - pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes) - let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins) - - var mainFontFamily = formData.getOrDefault("main_font", "serif") - if mainFontFamily != "serif" and mainFontFamily != "sans": - mainFontFamily = "serif" - - let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella" - let lineSpacing = - pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings) - let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on" - let epoch = int(getTime().toUnix()) - let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf" - let outputPath = generatedDir() / outputName - - let conversion = runPandoc( - markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers - ) - - if not conversion.ok: - let message = - if conversion.missingPandoc: - conversion.output - else: - let stderr = conversion.output.strip() - if stderr.len > 0: - tailText(stderr) - else: - "PDF conversion failed." - - let html = renderTemplate( - partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))] - ) - let code = if conversion.missingPandoc: Http500 else: Http400 - await respondHtml(req, code, html) - return - - let html = renderTemplate( - partialsDir() / "result.html", - [ - ("{{ filename }}", htmlEscape(outputName)), - ("{{ download_url }}", "/download/" & encodeUrl(outputName)), - ], - ) - await respondHtml(req, Http200, html) - -# upload endpoint. accepts image, returns markdown snippet -proc handleUploadImage(req: Request) {.async.} = - let contentType = req.headers.getOrDefault("Content-Type") - let boundary = extractBoundary(contentType) - - if boundary.len == 0: - let html = renderTemplate( - partialsDir() / "upload_error.html", - [("{{ message }}", "image file is required.")], - ) - await respondHtml(req, Http400, html) - return - - let parts = parseMultipart(req.body, boundary) - var imagePart: MultipartPart - var foundImage = false - for part in parts: - if part.name == "image": - imagePart = part - foundImage = true - break - - if not foundImage or imagePart.filename.strip().len == 0: - let html = renderTemplate( - partialsDir() / "upload_error.html", - [("{{ message }}", "image file is required.")], - ) - await respondHtml(req, Http400, html) - return - - let originalName = sanitizeFilename(baseFilename(imagePart.filename)) - if originalName.len == 0 or not isAllowedImage(originalName): - let html = renderTemplate( - partialsDir() / "upload_error.html", - [("{{ message }}", "unsupported image type.")], - ) - await respondHtml(req, Http400, html) - return - - let extensionStart = originalName.rfind('.') - let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii() - - let epoch = int(getTime().toUnix()) - let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension - let imagePath = uploadsDir() / storedName - - writeFile(imagePath, imagePart.content) - - let markdownSnippet = "![](uploads/" & storedName & ")" - let html = renderTemplate( - partialsDir() / "upload_result.html", - [ - ("{{ filename }}", htmlEscape(storedName)), - ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)), - ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)), - ], - ) - await respondHtml(req, Http200, html) - -# router table -proc route(req: Request) {.async.} = - let path = req.url.path - - if req.reqMethod == HttpGet and path == "/": - await respondFile(req, templatesDir() / "index.html") - return - - if req.reqMethod == HttpGet and path.startsWith("/static/"): - let relativePath = decodeUrl(path[8 .. ^1]) - if not isSafeRelativePath(relativePath): - await respondText(req, Http400, "Invalid path") - return - await respondFile(req, staticDir() / relativePath) - return - - if req.reqMethod == HttpGet and path.startsWith("/uploads/"): - let relativePath = decodeUrl(path[9 .. ^1]) - if not isSafeRelativePath(relativePath): - await respondText(req, Http400, "Invalid path") - return - await respondFile(req, uploadsDir() / relativePath) - return - - if req.reqMethod == HttpGet and path.startsWith("/download/"): - let relativePath = decodeUrl(path[10 .. ^1]) - if not isSafeRelativePath(relativePath): - await respondText(req, Http400, "Invalid path") - return - await respondFile( - req, - generatedDir() / relativePath, - asAttachment = true, - attachmentName = relativePath, - ) - return - - if req.reqMethod == HttpPost and path == "/convert": - await handleConvert(req) - return - - if req.reqMethod == HttpPost and path == "/upload-image": - await handleUploadImage(req) - return - - await respondText(req, Http404, "Not found") - -# server boot, then we let htmx do htmx things. -when isMainModule: - randomize() - - if not dirExists(generatedDir()): - createDir(generatedDir()) - if not dirExists(uploadsDir()): - createDir(uploadsDir()) - - let server = newAsyncHttpServer() - echo "listening on http://localhost:5001" - waitFor server.serve(Port(5001), route) diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..88deef4 --- /dev/null +++ b/src/app.py @@ -0,0 +1,568 @@ +#!/usr/bin/env python3 + +# likha-pdf — markdown to pdf, no latex required +# converts markdown to html, then html to pdf via weasyprint +# falls back to reportlab if weasyprint chokes — a pdf is always produced + +import os +import re +import secrets +import time + +from flask import ( + Flask, + request, + send_from_directory, + render_template_string, + abort, +) +from markupsafe import escape +from markdown import markdown +from pygments.formatters import HtmlFormatter +from weasyprint import HTML + +APP_NAME = "likha-pdf" +PORT = 5001 + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +GENERATED_DIR = os.path.join(BASE_DIR, "generated") +UPLOADS_DIR = os.path.join(BASE_DIR, "uploads") +TEMPLATES_DIR = os.path.join(BASE_DIR, "templates") +PARTIALS_DIR = os.path.join(TEMPLATES_DIR, "partials") +STATIC_DIR = os.path.join(BASE_DIR, "static") + +ALLOWED_IMAGE_EXTS = {"png", "jpg", "jpeg", "gif", "webp", "svg"} + +VALID_PAPER_SIZES = { + "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper", + "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper", + "c4paper", "c5paper", "c6paper", + "letterpaper", "legalpaper", "executivepaper", + "ledgerpaper", "tabloid", "statement", "flsa", +} + +VALID_MARGINS = {"0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"} + +VALID_LINE_SPACINGS = {"1", "1.5", "2"} + +# css page dimensions for each paper size +PAPER_CSS = { + "a0paper": "841mm 1189mm", + "a1paper": "594mm 841mm", + "a2paper": "420mm 594mm", + "a3paper": "297mm 420mm", + "a4paper": "210mm 297mm", + "a5paper": "148mm 210mm", + "a6paper": "105mm 148mm", + "b0paper": "1000mm 1414mm", + "b1paper": "707mm 1000mm", + "b2paper": "500mm 707mm", + "b3paper": "353mm 500mm", + "b4paper": "250mm 353mm", + "b5paper": "176mm 250mm", + "b6paper": "125mm 176mm", + "c4paper": "229mm 324mm", + "c5paper": "162mm 229mm", + "c6paper": "114mm 162mm", + "letterpaper": "8.5in 11in", + "legalpaper": "8.5in 14in", + "executivepaper": "7in 10in", + "ledgerpaper": "17in 11in", + "tabloid": "11in 17in", + "statement": "5.5in 8.5in", + "flsa": "8.5in 13in", +} + +MARKDOWN_EXTENSIONS = [ + "tables", + "fenced_code", + "codehilite", + "nl2br", + "sane_lists", + "smarty", + "toc", + "attr_list", + "md_in_html", +] + +MARKDOWN_EXT_CONFIG = { + "codehilite": { + "css_class": "highlight", + "guess_lang": True, + "noclasses": True, + }, +} + +app = Flask( + __name__, + template_folder=TEMPLATES_DIR, + static_folder=STATIC_DIR, + static_url_path="/static", +) +app.config["MAX_CONTENT_LENGTH"] = 64 * 1024 * 1024 # 64 MB + + +# helpers +def random_hex(length=32): + return secrets.token_hex(length // 2) + + +def pick_option(value, fallback, valid): + return value if value in valid else fallback + + +def sanitize_filename(name): + """keep only safe characters in a filename""" + name = os.path.basename(name.replace("\\", "/")) + out = [] + for ch in name: + if ch.isalnum() or ch in "-_.": + out.append(ch) + elif ch == " ": + out.append("_") + return "".join(out) + + +def is_allowed_image(filename): + dot = filename.rfind(".") + if dot < 1 or dot == len(filename) - 1: + return False + ext = filename[dot + 1:].lower() + return ext in ALLOWED_IMAGE_EXTS + + +def is_safe_relative_path(path_part): + return ( + bool(path_part) + and ".." not in path_part + and "\\" not in path_part + and not path_part.startswith("/") + ) + + +def read_partial(name, replacements=None): + """read a partial html template and apply replacements""" + path = os.path.join(PARTIALS_DIR, name) + with open(path, "r", encoding="utf-8") as f: + content = f.read() + if replacements: + for token, value in replacements.items(): + content = content.replace(token, value) + return content + + +def tail_text(value, max_len=1200): + if len(value) <= max_len: + return value + return value[-max_len:] + + +# pdf stylesheet generator +def build_pdf_css(paper_size, margin, font_family, line_spacing, show_page_numbers): + """build the css for weasyprint pdf rendering""" + page_dims = PAPER_CSS.get(paper_size, "8.5in 11in") + + if font_family == "sans": + font_stack = '"Helvetica Neue", Helvetica, Arial, "Noto Sans", sans-serif' + else: + font_stack = '"Georgia", "Noto Serif", "Times New Roman", serif' + + page_number_css = "" + if show_page_numbers: + page_number_css = """ + @bottom-center { + content: counter(page); + font-size: 9pt; + color: #666; + }""" + + return f""" +@page {{ + size: {page_dims}; + margin: {margin};{page_number_css} +}} + +body {{ + font-family: {font_stack}; + font-size: 11pt; + line-height: {line_spacing}; + color: #000; + word-wrap: break-word; + overflow-wrap: break-word; +}} + +h1, h2, h3, h4, h5, h6 {{ + margin-top: 1em; + margin-bottom: 0.4em; + page-break-after: avoid; +}} + +h1 {{ font-size: 20pt; }} +h2 {{ font-size: 16pt; }} +h3 {{ font-size: 13pt; }} +h4 {{ font-size: 11pt; }} + +p {{ + margin: 0 0 0.6em 0; +}} + +pre {{ + background: #f5f5f5; + border: 1px solid #ddd; + border-radius: 3px; + padding: 0.6em; + font-size: 9pt; + white-space: pre-wrap; + word-wrap: break-word; + overflow-wrap: break-word; + page-break-inside: avoid; +}} + +code {{ + font-family: "Courier New", Courier, "Liberation Mono", monospace; + font-size: 9pt; +}} + +p > code, li > code {{ + background: #f0f0f0; + padding: 0.1em 0.3em; + border-radius: 2px; +}} + +blockquote {{ + border-left: 3px solid #ccc; + margin: 0.6em 0; + padding: 0.3em 0.8em; + color: #555; +}} + +table {{ + border-collapse: collapse; + width: 100%; + margin: 0.6em 0; + page-break-inside: avoid; +}} + +th, td {{ + border: 1px solid #ccc; + padding: 0.4em 0.6em; + text-align: left; +}} + +th {{ + background: #f5f5f5; + font-weight: bold; +}} + +img {{ + max-width: 100%; + height: auto; +}} + +a {{ + color: #0066cc; + text-decoration: underline; +}} + +hr {{ + border: none; + border-top: 1px solid #ccc; + margin: 1em 0; +}} + +ul, ol {{ + margin: 0.4em 0; + padding-left: 1.5em; +}} + +li {{ + margin-bottom: 0.2em; +}} +""" + + +# pdf conversion +def markdown_to_html(source): + """convert markdown text to an html fragment""" + return markdown( + source, + extensions=MARKDOWN_EXTENSIONS, + extension_configs=MARKDOWN_EXT_CONFIG, + ) + + +def build_full_html(body_html, css): + """wrap the converted html body in a full document with styles""" + return f""" + + + + + + +{body_html} + +""" + + +def convert_with_weasyprint(full_html, output_path): + """render html to pdf via weasyprint. returns (ok, error_msg).""" + try: + doc = HTML( + string=full_html, + base_url=BASE_DIR, + ) + doc.write_pdf(output_path) + return True, "" + except Exception as exc: + return False, str(exc) + + +def convert_with_reportlab(source_markdown, output_path, paper_size, margin, + font_family, line_spacing): + """fallback: produce a basic text pdf with reportlab. + not pretty, but guarantees a file is always created.""" + from reportlab.lib.pagesizes import ( + A0, A1, A2, A3, A4, A5, A6, + B0, B1, B2, B3, B4, B5, B6, + LETTER, LEGAL, LEDGER, TABLOID, + ) + from reportlab.lib.units import inch, mm + from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Preformatted + from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle + from reportlab.lib.enums import TA_LEFT + + size_map = { + "a0paper": A0, "a1paper": A1, "a2paper": A2, "a3paper": A3, + "a4paper": A4, "a5paper": A5, "a6paper": A6, + "b0paper": B0, "b1paper": B1, "b2paper": B2, "b3paper": B3, + "b4paper": B4, "b5paper": B5, "b6paper": B6, + "letterpaper": LETTER, "legalpaper": LEGAL, + "executivepaper": (7 * inch, 10 * inch), + "ledgerpaper": LEDGER, "tabloid": TABLOID, + "statement": (5.5 * inch, 8.5 * inch), + "flsa": (8.5 * inch, 13 * inch), + "c4paper": (229 * mm, 324 * mm), + "c5paper": (162 * mm, 229 * mm), + "c6paper": (114 * mm, 162 * mm), + } + + margin_map = { + "0.25in": 0.25 * inch, "0.5in": 0.5 * inch, "0.75in": 0.75 * inch, + "1in": 1.0 * inch, "1.25in": 1.25 * inch, "1.5in": 1.5 * inch, + "1.75in": 1.75 * inch, + } + + pagesize = size_map.get(paper_size, LETTER) + m = margin_map.get(margin, 1.0 * inch) + + doc = SimpleDocTemplate( + output_path, + pagesize=pagesize, + leftMargin=m, rightMargin=m, + topMargin=m, bottomMargin=m, + ) + + styles = getSampleStyleSheet() + font_name = "Helvetica" if font_family == "sans" else "Times-Roman" + spacing_val = float(line_spacing) if line_spacing else 1.0 + + body_style = ParagraphStyle( + "BodyCustom", + parent=styles["Normal"], + fontName=font_name, + fontSize=11, + leading=11 * spacing_val * 1.2, + alignment=TA_LEFT, + ) + + code_style = ParagraphStyle( + "CodeCustom", + parent=styles["Code"], + fontName="Courier", + fontSize=9, + leading=11, + leftIndent=12, + ) + + story = [] + in_code_block = False + code_lines = [] + + for line in source_markdown.splitlines(): + if line.startswith("```"): + if in_code_block: + # close code block + code_text = "\n".join(code_lines) + story.append(Preformatted(code_text, code_style)) + story.append(Spacer(1, 6)) + code_lines = [] + in_code_block = False + else: + in_code_block = True + continue + + if in_code_block: + code_lines.append(line) + continue + + stripped = line.strip() + + if not stripped: + story.append(Spacer(1, 6)) + continue + + # heading detection + if stripped.startswith("#"): + level = len(stripped) - len(stripped.lstrip("#")) + level = min(level, 6) + text = stripped.lstrip("#").strip() + heading_style = ParagraphStyle( + f"H{level}", + parent=styles["Heading1"], + fontName=font_name, + fontSize=max(20 - (level * 2), 11), + ) + story.append(Paragraph(text, heading_style)) + story.append(Spacer(1, 4)) + continue + + story.append(Paragraph(line, body_style)) + + # flush any unclosed code block + if code_lines: + code_text = "\n".join(code_lines) + story.append(Preformatted(code_text, code_style)) + + doc.build(story) + + +def generate_pdf(source_markdown, output_path, paper_size, margin, + font_family, line_spacing, show_page_numbers): + """convert markdown to pdf. always produces a file.""" + body_html = markdown_to_html(source_markdown) + css = build_pdf_css(paper_size, margin, font_family, line_spacing, show_page_numbers) + full_html = build_full_html(body_html, css) + + ok, err = convert_with_weasyprint(full_html, output_path) + if ok: + return True, "" + + # weasyprint failed — fall back to reportlab + try: + convert_with_reportlab( + source_markdown, output_path, + paper_size, margin, font_family, line_spacing, + ) + return True, f"(used fallback renderer) {err}" + except Exception as fallback_err: + return False, f"weasyprint: {err} | reportlab: {fallback_err}" + + +# routes +@app.route("/") +def index(): + index_path = os.path.join(TEMPLATES_DIR, "index.html") + with open(index_path, "r", encoding="utf-8") as f: + return f.read() + + +@app.route("/convert", methods=["POST"]) +def convert(): + md = request.form.get("markdown", "").strip() + if not md: + return read_partial("error.html", { + "{{ message }}": "Markdown content is required.", + }), 400 + + paper_size = pick_option( + request.form.get("paper_size", ""), "letterpaper", VALID_PAPER_SIZES, + ) + margin = pick_option( + request.form.get("margin", ""), "1in", VALID_MARGINS, + ) + + font_family = request.form.get("main_font", "serif") + if font_family not in ("serif", "sans"): + font_family = "serif" + + line_spacing = pick_option( + request.form.get("line_spacing", ""), "1", VALID_LINE_SPACINGS, + ) + show_page_numbers = request.form.get("page_numbers") == "on" + + epoch = int(time.time()) + output_name = f"{APP_NAME}_{epoch}_{random_hex()}.pdf" + output_path = os.path.join(GENERATED_DIR, output_name) + + ok, err = generate_pdf( + md, output_path, + paper_size, margin, font_family, line_spacing, show_page_numbers, + ) + + if not ok: + return read_partial("error.html", { + "{{ message }}": str(escape(tail_text(err))), + }), 500 + + return read_partial("result.html", { + "{{ filename }}": str(escape(output_name)), + "{{ download_url }}": f"/download/{output_name}", + }) + + +@app.route("/upload-image", methods=["POST"]) +def upload_image(): + uploaded = request.files.get("image") + if not uploaded or not uploaded.filename or not uploaded.filename.strip(): + return read_partial("upload_error.html", { + "{{ message }}": "image file is required.", + }), 400 + + original = sanitize_filename(uploaded.filename) + if not original or not is_allowed_image(original): + return read_partial("upload_error.html", { + "{{ message }}": "unsupported image type.", + }), 400 + + ext = original.rsplit(".", 1)[-1].lower() + epoch = int(time.time()) + stored_name = f"img_{epoch}_{random_hex()}.{ext}" + image_path = os.path.join(UPLOADS_DIR, stored_name) + uploaded.save(image_path) + + snippet = f"![](uploads/{stored_name})" + return read_partial("upload_result.html", { + "{{ filename }}": str(escape(stored_name)), + "{{ markdown_snippet }}": str(escape(snippet)), + "{{ preview_url }}": f"/uploads/{stored_name}", + }) + + +@app.route("/uploads/") +def serve_upload(filename): + if not is_safe_relative_path(filename): + abort(400) + return send_from_directory(UPLOADS_DIR, filename) + + +@app.route("/download/") +def download(filename): + if not is_safe_relative_path(filename): + abort(400) + return send_from_directory( + GENERATED_DIR, filename, + as_attachment=True, + download_name=filename, + ) + + +# main +if __name__ == "__main__": + os.makedirs(GENERATED_DIR, exist_ok=True) + os.makedirs(UPLOADS_DIR, exist_ok=True) + + print(f" {APP_NAME} listening on http://localhost:{PORT}") + app.run(host="0.0.0.0", port=PORT, debug=False) diff --git a/src/latex/template.tex b/src/latex/template.tex deleted file mode 100644 index 8f9aacc..0000000 --- a/src/latex/template.tex +++ /dev/null @@ -1,88 +0,0 @@ -\documentclass[11pt]{article} - -\usepackage{fontspec} -\newfontfamily{\emojifont}{Noto Color Emoji}[Renderer=HarfBuzz] -\directlua{ - luaotfload.add_fallback("emojifallback", { - "Noto Color Emoji:mode=harf;" - }) -} -\setmainfont{$mainfont$}[RawFeature={fallback=emojifallback}] -\setmonofont{Latin Modern Mono} - -$if(paperwidth)$ -\usepackage[paperwidth=$paperwidth$,paperheight=$paperheight$,margin=$margin$]{geometry} -$else$ -\usepackage[paper=$papersize$,margin=$margin$]{geometry} -$endif$ -\usepackage{microtype} -\usepackage{parskip} -\usepackage{setspace} -\setstretch{$linespacing$} -\usepackage{xcolor} -\usepackage{graphicx} -\usepackage{float} -\usepackage{booktabs} -\usepackage{longtable} -\usepackage{array} -\usepackage{calc} -\usepackage{etoolbox} -\usepackage{fancyvrb} -\usepackage{fvextra} -\DefineVerbatimEnvironment{Highlighting}{Verbatim}{breaklines,commandchars=\\\{\}} -\usepackage{hyperref} -\hypersetup{ - colorlinks=true, - linkcolor=black, - urlcolor=blue, - citecolor=black, - pdfauthor={}, - pdftitle={likha-pdf} -} -\urlstyle{same} - -\setlength{\emergencystretch}{3em} -\setcounter{secnumdepth}{0} -\setkeys{Gin}{width=\linewidth,keepaspectratio} -\makeatletter -\newsavebox\pandoc@box -\newcommand*\pandocbounded[1]{% - \sbox\pandoc@box{#1}% - \Gscale@div\@tempa{\textheight}{\dimexpr\ht\pandoc@box+\dp\pandoc@box\relax}% - \Gscale@div\@tempb{\linewidth}{\wd\pandoc@box}% - \ifdim\@tempb\p@<\@tempa\p@ - \scalebox{\@tempb}{\usebox\pandoc@box}% - \else - \scalebox{\@tempa}{\usebox\pandoc@box}% - \fi -} -\makeatother -\let\origfigure\figure -\let\endorigfigure\endfigure -\renewenvironment{figure}[1][] { - \expandafter\origfigure\expandafter[H] -} { - \endorigfigure -} -\providecommand{\tightlist}{ - \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt} -} - -$if(highlighting-macros)$ -$highlighting-macros$ -$endif$ - -$if(hidepages)$ -\pagestyle{empty} -$endif$ - -\begin{document} - -$if(title)$ -{\huge\bfseries $title$\par} -\vspace{1em} -$endif$ - -$body$ - -\end{document} \ No newline at end of file diff --git a/src/templates/index.html b/src/templates/index.html index 30c4b93..a9501e9 100644 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -22,7 +22,7 @@

likha-pdf

-

simple markdown export with pandoc + lualatex.

+

simple markdown to pdf export.