likha-pdf

simple markdown export with pandoc + lualatex.

simple markdown to pdf export.

-- cgit v1.2.3 From fe9d6171de63ce839b01eade15ddcb6bb14e06f8 Mon Sep 17 00:00:00 2001 From: kj_sh604 Date: Wed, 11 Mar 2026 22:47:32 -0400 Subject: refactor: prod-level changes --- Dockerfile | 18 +- README.md | 24 ++ requirements.txt | 1 + src/__legacy_src/app.nim | 525 ++++++++++++++++++++++++++++++++++++ src/__legacy_src/backend_compat.nim | 525 ++++++++++++++++++++++++++++++++++++ src/__legacy_src/server.nim | 525 ++++++++++++++++++++++++++++++++++++ src/app.py | 280 ++++++++++--------- 7 files changed, 1774 insertions(+), 124 deletions(-) create mode 100644 src/__legacy_src/app.nim create mode 100644 src/__legacy_src/backend_compat.nim create mode 100644 src/__legacy_src/server.nim diff --git a/Dockerfile b/Dockerfile index e87147d..8ff9c84 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,18 @@ FROM python:3.12-slim -ENV DEBIAN_FRONTEND=noninteractive +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PORT=5001 \ + HOST=0.0.0.0 \ + LOG_LEVEL=INFO \ + TRUST_PROXY=1 RUN apt-get update && apt-get install -y --no-install-recommends \ libcairo2 \ libpango-1.0-0 \ libpangocairo-1.0-0 \ libgdk-pixbuf-2.0-0 \ - libffi-dev \ shared-mime-info \ fonts-noto \ fonts-noto-color-emoji \ @@ -15,13 +20,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app +RUN addgroup --system app && adduser --system --ingroup app app + COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir --disable-pip-version-check -r requirements.txt COPY src/ . RUN mkdir -p generated uploads +RUN chown -R app:app /app +USER app + EXPOSE 5001 -CMD ["python3", "app.py"] +CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--workers", "2", "--threads", "4", "--timeout", "180", "--graceful-timeout", "30", "--access-logfile", "-", "--error-logfile", "-", "app:app"] diff --git a/README.md b/README.md index 7d7a087..0b52be4 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ a simple web app that converts markdown to pdf. - python 3.10+ - system packages: `libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 shared-mime-info` +- gunicorn (installed from `requirements.txt`) ## image usage @@ -29,11 +30,34 @@ a simple web app that converts markdown to pdf. ### local ```bash +python -m venv .venv +source .venv/bin/activate pip install -r requirements.txt cd src/ python3 app.py ``` +### production (vps + nginx) + +```bash +cd src/ +../.venv/bin/gunicorn \ + --bind 127.0.0.1:5001 \ + --workers 2 \ + --threads 4 \ + --timeout 180 \ + --graceful-timeout 30 \ + --access-logfile - \ + --error-logfile - \ + app:app +``` + +nginx should reverse proxy to `127.0.0.1:5001` and pass: + +- `X-Forwarded-For` +- `X-Forwarded-Proto` +- `X-Forwarded-Host` + ### docker ```bash diff --git a/requirements.txt b/requirements.txt index e081602..e50d931 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ markdown==3.7.* pygments==2.19.* weasyprint==63.* reportlab==4.3.* +gunicorn==23.0.* \ No newline at end of file diff --git a/src/__legacy_src/app.nim b/src/__legacy_src/app.nim new file mode 100644 index 0000000..8b64793 --- /dev/null +++ b/src/__legacy_src/app.nim @@ -0,0 +1,525 @@ +import + std/[ + asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri, + random, + ] + +# tiny backend in nimlang, may be stupid, but this was fun + +const + AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"] + ValidPaperSizes = [ + "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper", + "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper", + "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper", + "ledgerpaper", "tabloid", "statement", "flsa", + ] + ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"] + ValidLineSpacings = ["1", "1.5", "2"] + CustomPaperDimensions = [ + ("tabloid", "11in", "17in"), + ("statement", "5.5in", "8.5in"), + ("flsa", "8.5in", "13in"), + ] + +const AppName = "likha-pdf" + +proc lookupCustomPaper(name: string): tuple[width: string, height: string] = + for (paperName, w, h) in CustomPaperDimensions: + if paperName == name: + return (width: w, height: h) + (width: "", height: "") + +proc baseDir(): string {.inline.} = + getAppDir() + +proc generatedDir(): string {.inline.} = + baseDir() / "generated" + +proc uploadsDir(): string {.inline.} = + baseDir() / "uploads" + +proc latexTemplatePath(): string {.inline.} = + baseDir() / "latex" / "template.tex" + +proc templatesDir(): string {.inline.} = + baseDir() / "templates" + +proc partialsDir(): string {.inline.} = + templatesDir() / "partials" + +proc staticDir(): string {.inline.} = + baseDir() / "static" + +type MultipartPart = object + name: string + filename: string + contentType: string + content: string + +# helpers +proc htmlEscape(value: string): string = + result = value + result = result.replace("&", "&") + result = result.replace("<", "<") + result = result.replace(">", ">") + result = result.replace("\"", """) + result = result.replace("'", "'") + +proc randomHex(length: int): string = + const hexChars = "0123456789abcdef" + result = newStringOfCap(length) + for _ in 0 ..< length: + result.add(hexChars[rand(15)]) + +proc renderTemplate( + filePath: string, replacements: openArray[(string, string)] +): string = + result = readFile(filePath) + for (token, replacement) in replacements: + result = result.replace(token, replacement) + +proc decodeFormComponent(value: string): string = + decodeUrl(value.replace("+", " ")) + +proc parseUrlEncoded(body: string): Table[string, string] = + result = initTable[string, string]() + if body.len == 0: + return + + for pair in body.split("&"): + if pair.len == 0: + continue + let separator = pair.find('=') + if separator < 0: + result[decodeFormComponent(pair)] = "" + else: + let key = decodeFormComponent(pair[0 ..< separator]) + let value = decodeFormComponent(pair[separator + 1 .. ^1]) + result[key] = value + +# "options" are optional, defaults are forever. +proc pickOption(value: string, fallback: string, options: openArray[string]): string = + for option in options: + if option == value: + return value + fallback + +proc sanitizeFilename(filename: string): string = + result = newStringOfCap(filename.len) + for ch in filename: + if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or + (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}): + result.add(ch) + elif ch == ' ': + result.add('_') + +proc baseFilename(value: string): string = + var normalized = value.replace("\\", "/") + let index = normalized.rfind('/') + if index >= 0 and index < normalized.high: + normalized = normalized[index + 1 .. ^1] + elif index == normalized.high: + normalized = "" + normalized + +proc isAllowedImage(filename: string): bool = + let dot = filename.rfind('.') + if dot < 1 or dot == filename.high: + return false + let extension = filename[dot + 1 .. ^1].toLowerAscii() + for allowed in AllowedImageExtensions: + if extension == allowed: + return true + false + +proc tailText(value: string, maxLen: int = 1200): string = + if value.len <= maxLen: + return value + value[value.len - maxLen .. ^1] + +proc extractBoundary(contentType: string): string = + for part in contentType.split(';'): + let token = part.strip() + if token.toLowerAscii().startsWith("boundary="): + return token[9 .. ^1].strip(chars = {'\"', '\''}) + "" + +proc stripTrailingCrlf(value: string): string = + result = value + if result.len >= 2 and result.endsWith("\r\n"): + result.setLen(result.len - 2) + +# hand-rolled multipart parsing, yes i am aware that this is "eh" +proc parseMultipart(body: string, boundary: string): seq[MultipartPart] = + let delimiter = "--" & boundary + for rawChunk in body.split(delimiter): + var chunk = rawChunk + if chunk.len == 0: + continue + if chunk == "--" or chunk == "--\r\n": + continue + if chunk.startsWith("\r\n"): + chunk = chunk[2 .. ^1] + + chunk = stripTrailingCrlf(chunk) + + if chunk.len == 2 and chunk == "--": + continue + + let splitIndex = chunk.find("\r\n\r\n") + if splitIndex < 0: + continue + + let headerBlock = chunk[0 ..< splitIndex] + var content = chunk[splitIndex + 4 .. ^1] + content = stripTrailingCrlf(content) + + var name = "" + var filename = "" + var contentType = "application/octet-stream" + + for line in headerBlock.split("\r\n"): + let separator = line.find(':') + if separator <= 0: + continue + let headerName = line[0 ..< separator].strip().toLowerAscii() + let headerValue = line[separator + 1 .. ^1].strip() + + if headerName == "content-disposition": + for part in headerValue.split(';'): + let token = part.strip() + if token.startsWith("name="): + name = token[5 .. ^1].strip(chars = {'\"', '\''}) + elif token.startsWith("filename="): + filename = token[9 .. ^1].strip(chars = {'\"', '\''}) + elif headerName == "content-type": + contentType = headerValue + + if name.len > 0: + result.add( + MultipartPart( + name: name, filename: filename, contentType: contentType, content: content + ) + ) + +proc isSafeRelativePath(pathPart: string): bool = + pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and + not pathPart.startsWith("/") + +proc fileContentType(filePath: string): string = + let lowered = filePath.toLowerAscii() + if lowered.endsWith(".js"): + return "application/javascript; charset=utf-8" + if lowered.endsWith(".css"): + return "text/css; charset=utf-8" + if lowered.endsWith(".html"): + return "text/html; charset=utf-8" + if lowered.endsWith(".png"): + return "image/png" + if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"): + return "image/jpeg" + if lowered.endsWith(".gif"): + return "image/gif" + if lowered.endsWith(".webp"): + return "image/webp" + if lowered.endsWith(".svg"): + return "image/svg+xml" + if lowered.endsWith(".pdf"): + return "application/pdf" + "application/octet-stream" + +# response wrappers +proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} = + let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"}) + await req.respond(code, content, headers) + +proc respondText(req: Request, code: HttpCode, content: string) {.async.} = + let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"}) + await req.respond(code, content, headers) + +proc respondFile( + req: Request, + filePath: string, + asAttachment: bool = false, + attachmentName: string = "", +) {.async.} = + if not fileExists(filePath): + await respondText(req, Http404, "Not found") + return + + var headers = newHttpHeaders() + headers["Content-Type"] = fileContentType(filePath) + if asAttachment and attachmentName.len > 0: + headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\"" + + await req.respond(Http200, readFile(filePath), headers) + +# pandoc does the heavy lifting +proc runPandoc( + sourceMarkdown: string, + outputPath: string, + paperSize: string, + margin: string, + mainFont: string, + lineSpacing: string, + showPageNumbers: bool, +): tuple[ok: bool, output: string, missingPandoc: bool] = + let tempDir = getTempDir() / (AppName & "-" & randomHex(10)) + createDir(tempDir) + let tempMarkdownPath = tempDir / "source.md" + let tempRawPath = tempDir / "raw.md" + + try: + # write raw markdown first + writeFile(tempRawPath, sourceMarkdown) + + # preprocess markdown: convert to ascii with transliteration and normalize quotes + let iconvCmd = + "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) & + " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath) + let (_, iconvExitCode) = execCmdEx(iconvCmd) + + if iconvExitCode != 0: + # if preprocessing fails, fall back to original content + writeFile(tempMarkdownPath, sourceMarkdown) + + var args = @[ + tempMarkdownPath, + "--from", + "markdown+emoji+hard_line_breaks", + "--pdf-engine=lualatex", + "--template", + latexTemplatePath(), + "-V", + "margin=" & margin, + "-V", + "mainfont=" & mainFont, + "-V", + "linespacing=" & lineSpacing, + "--resource-path", + baseDir() & ":" & uploadsDir() & ":" & tempDir, + "-o", + outputPath, + ] + + let dims = lookupCustomPaper(paperSize) + if dims.width.len > 0: + args.add("-V") + args.add("paperwidth=" & dims.width) + args.add("-V") + args.add("paperheight=" & dims.height) + else: + args.add("-V") + args.add("papersize=" & paperSize) + + if not showPageNumbers: + args.add("-V") + args.add("hidepages=true") + + var process: Process + try: + process = + startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut}) + except OSError: + return ( + ok: false, + output: "Pandoc is not installed or not in PATH.", + missingPandoc: true, + ) + + let output = process.outputStream.readAll() + let exitCode = process.waitForExit() + process.close() + + if exitCode == 0: + return (ok: true, output: "", missingPandoc: false) + return (ok: false, output: output, missingPandoc: false) + finally: + try: + if fileExists(tempRawPath): + removeFile(tempRawPath) + if fileExists(tempMarkdownPath): + removeFile(tempMarkdownPath) + if dirExists(tempDir): + removeDir(tempDir) + except OSError: + discard + +# app endpoint: strict inputs, loud errors. +proc handleConvert(req: Request) {.async.} = + let formData = parseUrlEncoded(req.body) + let markdown = formData.getOrDefault("markdown", "").strip() + + if markdown.len == 0: + let html = renderTemplate( + partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")] + ) + await respondHtml(req, Http400, html) + return + + let paperSize = + pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes) + let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins) + + var mainFontFamily = formData.getOrDefault("main_font", "serif") + if mainFontFamily != "serif" and mainFontFamily != "sans": + mainFontFamily = "serif" + + let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella" + let lineSpacing = + pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings) + let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on" + let epoch = int(getTime().toUnix()) + let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf" + let outputPath = generatedDir() / outputName + + let conversion = runPandoc( + markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers + ) + + if not conversion.ok: + let message = + if conversion.missingPandoc: + conversion.output + else: + let stderr = conversion.output.strip() + if stderr.len > 0: + tailText(stderr) + else: + "PDF conversion failed." + + let html = renderTemplate( + partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))] + ) + let code = if conversion.missingPandoc: Http500 else: Http400 + await respondHtml(req, code, html) + return + + let html = renderTemplate( + partialsDir() / "result.html", + [ + ("{{ filename }}", htmlEscape(outputName)), + ("{{ download_url }}", "/download/" & encodeUrl(outputName)), + ], + ) + await respondHtml(req, Http200, html) + +# upload endpoint. accepts image, returns markdown snippet +proc handleUploadImage(req: Request) {.async.} = + let contentType = req.headers.getOrDefault("Content-Type") + let boundary = extractBoundary(contentType) + + if boundary.len == 0: + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "image file is required.")], + ) + await respondHtml(req, Http400, html) + return + + let parts = parseMultipart(req.body, boundary) + var imagePart: MultipartPart + var foundImage = false + for part in parts: + if part.name == "image": + imagePart = part + foundImage = true + break + + if not foundImage or imagePart.filename.strip().len == 0: + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "image file is required.")], + ) + await respondHtml(req, Http400, html) + return + + let originalName = sanitizeFilename(baseFilename(imagePart.filename)) + if originalName.len == 0 or not isAllowedImage(originalName): + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "unsupported image type.")], + ) + await respondHtml(req, Http400, html) + return + + let extensionStart = originalName.rfind('.') + let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii() + + let epoch = int(getTime().toUnix()) + let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension + let imagePath = uploadsDir() / storedName + + writeFile(imagePath, imagePart.content) + + let markdownSnippet = "![](uploads/" & storedName & ")" + let html = renderTemplate( + partialsDir() / "upload_result.html", + [ + ("{{ filename }}", htmlEscape(storedName)), + ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)), + ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)), + ], + ) + await respondHtml(req, Http200, html) + +# router table +proc route(req: Request) {.async.} = + let path = req.url.path + + if req.reqMethod == HttpGet and path == "/": + await respondFile(req, templatesDir() / "index.html") + return + + if req.reqMethod == HttpGet and path.startsWith("/static/"): + let relativePath = decodeUrl(path[8 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile(req, staticDir() / relativePath) + return + + if req.reqMethod == HttpGet and path.startsWith("/uploads/"): + let relativePath = decodeUrl(path[9 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile(req, uploadsDir() / relativePath) + return + + if req.reqMethod == HttpGet and path.startsWith("/download/"): + let relativePath = decodeUrl(path[10 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile( + req, + generatedDir() / relativePath, + asAttachment = true, + attachmentName = relativePath, + ) + return + + if req.reqMethod == HttpPost and path == "/convert": + await handleConvert(req) + return + + if req.reqMethod == HttpPost and path == "/upload-image": + await handleUploadImage(req) + return + + await respondText(req, Http404, "Not found") + +# server boot, then we let htmx do htmx things. +when isMainModule: + randomize() + + if not dirExists(generatedDir()): + createDir(generatedDir()) + if not dirExists(uploadsDir()): + createDir(uploadsDir()) + + let server = newAsyncHttpServer() + echo "listening on http://localhost:5001" + waitFor server.serve(Port(5001), route) \ No newline at end of file diff --git a/src/__legacy_src/backend_compat.nim b/src/__legacy_src/backend_compat.nim new file mode 100644 index 0000000..8b64793 --- /dev/null +++ b/src/__legacy_src/backend_compat.nim @@ -0,0 +1,525 @@ +import + std/[ + asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri, + random, + ] + +# tiny backend in nimlang, may be stupid, but this was fun + +const + AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"] + ValidPaperSizes = [ + "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper", + "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper", + "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper", + "ledgerpaper", "tabloid", "statement", "flsa", + ] + ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"] + ValidLineSpacings = ["1", "1.5", "2"] + CustomPaperDimensions = [ + ("tabloid", "11in", "17in"), + ("statement", "5.5in", "8.5in"), + ("flsa", "8.5in", "13in"), + ] + +const AppName = "likha-pdf" + +proc lookupCustomPaper(name: string): tuple[width: string, height: string] = + for (paperName, w, h) in CustomPaperDimensions: + if paperName == name: + return (width: w, height: h) + (width: "", height: "") + +proc baseDir(): string {.inline.} = + getAppDir() + +proc generatedDir(): string {.inline.} = + baseDir() / "generated" + +proc uploadsDir(): string {.inline.} = + baseDir() / "uploads" + +proc latexTemplatePath(): string {.inline.} = + baseDir() / "latex" / "template.tex" + +proc templatesDir(): string {.inline.} = + baseDir() / "templates" + +proc partialsDir(): string {.inline.} = + templatesDir() / "partials" + +proc staticDir(): string {.inline.} = + baseDir() / "static" + +type MultipartPart = object + name: string + filename: string + contentType: string + content: string + +# helpers +proc htmlEscape(value: string): string = + result = value + result = result.replace("&", "&") + result = result.replace("<", "<") + result = result.replace(">", ">") + result = result.replace("\"", """) + result = result.replace("'", "'") + +proc randomHex(length: int): string = + const hexChars = "0123456789abcdef" + result = newStringOfCap(length) + for _ in 0 ..< length: + result.add(hexChars[rand(15)]) + +proc renderTemplate( + filePath: string, replacements: openArray[(string, string)] +): string = + result = readFile(filePath) + for (token, replacement) in replacements: + result = result.replace(token, replacement) + +proc decodeFormComponent(value: string): string = + decodeUrl(value.replace("+", " ")) + +proc parseUrlEncoded(body: string): Table[string, string] = + result = initTable[string, string]() + if body.len == 0: + return + + for pair in body.split("&"): + if pair.len == 0: + continue + let separator = pair.find('=') + if separator < 0: + result[decodeFormComponent(pair)] = "" + else: + let key = decodeFormComponent(pair[0 ..< separator]) + let value = decodeFormComponent(pair[separator + 1 .. ^1]) + result[key] = value + +# "options" are optional, defaults are forever. +proc pickOption(value: string, fallback: string, options: openArray[string]): string = + for option in options: + if option == value: + return value + fallback + +proc sanitizeFilename(filename: string): string = + result = newStringOfCap(filename.len) + for ch in filename: + if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or + (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}): + result.add(ch) + elif ch == ' ': + result.add('_') + +proc baseFilename(value: string): string = + var normalized = value.replace("\\", "/") + let index = normalized.rfind('/') + if index >= 0 and index < normalized.high: + normalized = normalized[index + 1 .. ^1] + elif index == normalized.high: + normalized = "" + normalized + +proc isAllowedImage(filename: string): bool = + let dot = filename.rfind('.') + if dot < 1 or dot == filename.high: + return false + let extension = filename[dot + 1 .. ^1].toLowerAscii() + for allowed in AllowedImageExtensions: + if extension == allowed: + return true + false + +proc tailText(value: string, maxLen: int = 1200): string = + if value.len <= maxLen: + return value + value[value.len - maxLen .. ^1] + +proc extractBoundary(contentType: string): string = + for part in contentType.split(';'): + let token = part.strip() + if token.toLowerAscii().startsWith("boundary="): + return token[9 .. ^1].strip(chars = {'\"', '\''}) + "" + +proc stripTrailingCrlf(value: string): string = + result = value + if result.len >= 2 and result.endsWith("\r\n"): + result.setLen(result.len - 2) + +# hand-rolled multipart parsing, yes i am aware that this is "eh" +proc parseMultipart(body: string, boundary: string): seq[MultipartPart] = + let delimiter = "--" & boundary + for rawChunk in body.split(delimiter): + var chunk = rawChunk + if chunk.len == 0: + continue + if chunk == "--" or chunk == "--\r\n": + continue + if chunk.startsWith("\r\n"): + chunk = chunk[2 .. ^1] + + chunk = stripTrailingCrlf(chunk) + + if chunk.len == 2 and chunk == "--": + continue + + let splitIndex = chunk.find("\r\n\r\n") + if splitIndex < 0: + continue + + let headerBlock = chunk[0 ..< splitIndex] + var content = chunk[splitIndex + 4 .. ^1] + content = stripTrailingCrlf(content) + + var name = "" + var filename = "" + var contentType = "application/octet-stream" + + for line in headerBlock.split("\r\n"): + let separator = line.find(':') + if separator <= 0: + continue + let headerName = line[0 ..< separator].strip().toLowerAscii() + let headerValue = line[separator + 1 .. ^1].strip() + + if headerName == "content-disposition": + for part in headerValue.split(';'): + let token = part.strip() + if token.startsWith("name="): + name = token[5 .. ^1].strip(chars = {'\"', '\''}) + elif token.startsWith("filename="): + filename = token[9 .. ^1].strip(chars = {'\"', '\''}) + elif headerName == "content-type": + contentType = headerValue + + if name.len > 0: + result.add( + MultipartPart( + name: name, filename: filename, contentType: contentType, content: content + ) + ) + +proc isSafeRelativePath(pathPart: string): bool = + pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and + not pathPart.startsWith("/") + +proc fileContentType(filePath: string): string = + let lowered = filePath.toLowerAscii() + if lowered.endsWith(".js"): + return "application/javascript; charset=utf-8" + if lowered.endsWith(".css"): + return "text/css; charset=utf-8" + if lowered.endsWith(".html"): + return "text/html; charset=utf-8" + if lowered.endsWith(".png"): + return "image/png" + if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"): + return "image/jpeg" + if lowered.endsWith(".gif"): + return "image/gif" + if lowered.endsWith(".webp"): + return "image/webp" + if lowered.endsWith(".svg"): + return "image/svg+xml" + if lowered.endsWith(".pdf"): + return "application/pdf" + "application/octet-stream" + +# response wrappers +proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} = + let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"}) + await req.respond(code, content, headers) + +proc respondText(req: Request, code: HttpCode, content: string) {.async.} = + let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"}) + await req.respond(code, content, headers) + +proc respondFile( + req: Request, + filePath: string, + asAttachment: bool = false, + attachmentName: string = "", +) {.async.} = + if not fileExists(filePath): + await respondText(req, Http404, "Not found") + return + + var headers = newHttpHeaders() + headers["Content-Type"] = fileContentType(filePath) + if asAttachment and attachmentName.len > 0: + headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\"" + + await req.respond(Http200, readFile(filePath), headers) + +# pandoc does the heavy lifting +proc runPandoc( + sourceMarkdown: string, + outputPath: string, + paperSize: string, + margin: string, + mainFont: string, + lineSpacing: string, + showPageNumbers: bool, +): tuple[ok: bool, output: string, missingPandoc: bool] = + let tempDir = getTempDir() / (AppName & "-" & randomHex(10)) + createDir(tempDir) + let tempMarkdownPath = tempDir / "source.md" + let tempRawPath = tempDir / "raw.md" + + try: + # write raw markdown first + writeFile(tempRawPath, sourceMarkdown) + + # preprocess markdown: convert to ascii with transliteration and normalize quotes + let iconvCmd = + "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) & + " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath) + let (_, iconvExitCode) = execCmdEx(iconvCmd) + + if iconvExitCode != 0: + # if preprocessing fails, fall back to original content + writeFile(tempMarkdownPath, sourceMarkdown) + + var args = @[ + tempMarkdownPath, + "--from", + "markdown+emoji+hard_line_breaks", + "--pdf-engine=lualatex", + "--template", + latexTemplatePath(), + "-V", + "margin=" & margin, + "-V", + "mainfont=" & mainFont, + "-V", + "linespacing=" & lineSpacing, + "--resource-path", + baseDir() & ":" & uploadsDir() & ":" & tempDir, + "-o", + outputPath, + ] + + let dims = lookupCustomPaper(paperSize) + if dims.width.len > 0: + args.add("-V") + args.add("paperwidth=" & dims.width) + args.add("-V") + args.add("paperheight=" & dims.height) + else: + args.add("-V") + args.add("papersize=" & paperSize) + + if not showPageNumbers: + args.add("-V") + args.add("hidepages=true") + + var process: Process + try: + process = + startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut}) + except OSError: + return ( + ok: false, + output: "Pandoc is not installed or not in PATH.", + missingPandoc: true, + ) + + let output = process.outputStream.readAll() + let exitCode = process.waitForExit() + process.close() + + if exitCode == 0: + return (ok: true, output: "", missingPandoc: false) + return (ok: false, output: output, missingPandoc: false) + finally: + try: + if fileExists(tempRawPath): + removeFile(tempRawPath) + if fileExists(tempMarkdownPath): + removeFile(tempMarkdownPath) + if dirExists(tempDir): + removeDir(tempDir) + except OSError: + discard + +# app endpoint: strict inputs, loud errors. +proc handleConvert(req: Request) {.async.} = + let formData = parseUrlEncoded(req.body) + let markdown = formData.getOrDefault("markdown", "").strip() + + if markdown.len == 0: + let html = renderTemplate( + partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")] + ) + await respondHtml(req, Http400, html) + return + + let paperSize = + pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes) + let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins) + + var mainFontFamily = formData.getOrDefault("main_font", "serif") + if mainFontFamily != "serif" and mainFontFamily != "sans": + mainFontFamily = "serif" + + let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella" + let lineSpacing = + pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings) + let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on" + let epoch = int(getTime().toUnix()) + let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf" + let outputPath = generatedDir() / outputName + + let conversion = runPandoc( + markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers + ) + + if not conversion.ok: + let message = + if conversion.missingPandoc: + conversion.output + else: + let stderr = conversion.output.strip() + if stderr.len > 0: + tailText(stderr) + else: + "PDF conversion failed." + + let html = renderTemplate( + partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))] + ) + let code = if conversion.missingPandoc: Http500 else: Http400 + await respondHtml(req, code, html) + return + + let html = renderTemplate( + partialsDir() / "result.html", + [ + ("{{ filename }}", htmlEscape(outputName)), + ("{{ download_url }}", "/download/" & encodeUrl(outputName)), + ], + ) + await respondHtml(req, Http200, html) + +# upload endpoint. accepts image, returns markdown snippet +proc handleUploadImage(req: Request) {.async.} = + let contentType = req.headers.getOrDefault("Content-Type") + let boundary = extractBoundary(contentType) + + if boundary.len == 0: + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "image file is required.")], + ) + await respondHtml(req, Http400, html) + return + + let parts = parseMultipart(req.body, boundary) + var imagePart: MultipartPart + var foundImage = false + for part in parts: + if part.name == "image": + imagePart = part + foundImage = true + break + + if not foundImage or imagePart.filename.strip().len == 0: + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "image file is required.")], + ) + await respondHtml(req, Http400, html) + return + + let originalName = sanitizeFilename(baseFilename(imagePart.filename)) + if originalName.len == 0 or not isAllowedImage(originalName): + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "unsupported image type.")], + ) + await respondHtml(req, Http400, html) + return + + let extensionStart = originalName.rfind('.') + let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii() + + let epoch = int(getTime().toUnix()) + let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension + let imagePath = uploadsDir() / storedName + + writeFile(imagePath, imagePart.content) + + let markdownSnippet = "![](uploads/" & storedName & ")" + let html = renderTemplate( + partialsDir() / "upload_result.html", + [ + ("{{ filename }}", htmlEscape(storedName)), + ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)), + ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)), + ], + ) + await respondHtml(req, Http200, html) + +# router table +proc route(req: Request) {.async.} = + let path = req.url.path + + if req.reqMethod == HttpGet and path == "/": + await respondFile(req, templatesDir() / "index.html") + return + + if req.reqMethod == HttpGet and path.startsWith("/static/"): + let relativePath = decodeUrl(path[8 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile(req, staticDir() / relativePath) + return + + if req.reqMethod == HttpGet and path.startsWith("/uploads/"): + let relativePath = decodeUrl(path[9 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile(req, uploadsDir() / relativePath) + return + + if req.reqMethod == HttpGet and path.startsWith("/download/"): + let relativePath = decodeUrl(path[10 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile( + req, + generatedDir() / relativePath, + asAttachment = true, + attachmentName = relativePath, + ) + return + + if req.reqMethod == HttpPost and path == "/convert": + await handleConvert(req) + return + + if req.reqMethod == HttpPost and path == "/upload-image": + await handleUploadImage(req) + return + + await respondText(req, Http404, "Not found") + +# server boot, then we let htmx do htmx things. +when isMainModule: + randomize() + + if not dirExists(generatedDir()): + createDir(generatedDir()) + if not dirExists(uploadsDir()): + createDir(uploadsDir()) + + let server = newAsyncHttpServer() + echo "listening on http://localhost:5001" + waitFor server.serve(Port(5001), route) \ No newline at end of file diff --git a/src/__legacy_src/server.nim b/src/__legacy_src/server.nim new file mode 100644 index 0000000..8b64793 --- /dev/null +++ b/src/__legacy_src/server.nim @@ -0,0 +1,525 @@ +import + std/[ + asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri, + random, + ] + +# tiny backend in nimlang, may be stupid, but this was fun + +const + AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"] + ValidPaperSizes = [ + "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper", + "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper", + "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper", + "ledgerpaper", "tabloid", "statement", "flsa", + ] + ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"] + ValidLineSpacings = ["1", "1.5", "2"] + CustomPaperDimensions = [ + ("tabloid", "11in", "17in"), + ("statement", "5.5in", "8.5in"), + ("flsa", "8.5in", "13in"), + ] + +const AppName = "likha-pdf" + +proc lookupCustomPaper(name: string): tuple[width: string, height: string] = + for (paperName, w, h) in CustomPaperDimensions: + if paperName == name: + return (width: w, height: h) + (width: "", height: "") + +proc baseDir(): string {.inline.} = + getAppDir() + +proc generatedDir(): string {.inline.} = + baseDir() / "generated" + +proc uploadsDir(): string {.inline.} = + baseDir() / "uploads" + +proc latexTemplatePath(): string {.inline.} = + baseDir() / "latex" / "template.tex" + +proc templatesDir(): string {.inline.} = + baseDir() / "templates" + +proc partialsDir(): string {.inline.} = + templatesDir() / "partials" + +proc staticDir(): string {.inline.} = + baseDir() / "static" + +type MultipartPart = object + name: string + filename: string + contentType: string + content: string + +# helpers +proc htmlEscape(value: string): string = + result = value + result = result.replace("&", "&") + result = result.replace("<", "<") + result = result.replace(">", ">") + result = result.replace("\"", """) + result = result.replace("'", "'") + +proc randomHex(length: int): string = + const hexChars = "0123456789abcdef" + result = newStringOfCap(length) + for _ in 0 ..< length: + result.add(hexChars[rand(15)]) + +proc renderTemplate( + filePath: string, replacements: openArray[(string, string)] +): string = + result = readFile(filePath) + for (token, replacement) in replacements: + result = result.replace(token, replacement) + +proc decodeFormComponent(value: string): string = + decodeUrl(value.replace("+", " ")) + +proc parseUrlEncoded(body: string): Table[string, string] = + result = initTable[string, string]() + if body.len == 0: + return + + for pair in body.split("&"): + if pair.len == 0: + continue + let separator = pair.find('=') + if separator < 0: + result[decodeFormComponent(pair)] = "" + else: + let key = decodeFormComponent(pair[0 ..< separator]) + let value = decodeFormComponent(pair[separator + 1 .. ^1]) + result[key] = value + +# "options" are optional, defaults are forever. +proc pickOption(value: string, fallback: string, options: openArray[string]): string = + for option in options: + if option == value: + return value + fallback + +proc sanitizeFilename(filename: string): string = + result = newStringOfCap(filename.len) + for ch in filename: + if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or + (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}): + result.add(ch) + elif ch == ' ': + result.add('_') + +proc baseFilename(value: string): string = + var normalized = value.replace("\\", "/") + let index = normalized.rfind('/') + if index >= 0 and index < normalized.high: + normalized = normalized[index + 1 .. ^1] + elif index == normalized.high: + normalized = "" + normalized + +proc isAllowedImage(filename: string): bool = + let dot = filename.rfind('.') + if dot < 1 or dot == filename.high: + return false + let extension = filename[dot + 1 .. ^1].toLowerAscii() + for allowed in AllowedImageExtensions: + if extension == allowed: + return true + false + +proc tailText(value: string, maxLen: int = 1200): string = + if value.len <= maxLen: + return value + value[value.len - maxLen .. ^1] + +proc extractBoundary(contentType: string): string = + for part in contentType.split(';'): + let token = part.strip() + if token.toLowerAscii().startsWith("boundary="): + return token[9 .. ^1].strip(chars = {'\"', '\''}) + "" + +proc stripTrailingCrlf(value: string): string = + result = value + if result.len >= 2 and result.endsWith("\r\n"): + result.setLen(result.len - 2) + +# hand-rolled multipart parsing, yes i am aware that this is "eh" +proc parseMultipart(body: string, boundary: string): seq[MultipartPart] = + let delimiter = "--" & boundary + for rawChunk in body.split(delimiter): + var chunk = rawChunk + if chunk.len == 0: + continue + if chunk == "--" or chunk == "--\r\n": + continue + if chunk.startsWith("\r\n"): + chunk = chunk[2 .. ^1] + + chunk = stripTrailingCrlf(chunk) + + if chunk.len == 2 and chunk == "--": + continue + + let splitIndex = chunk.find("\r\n\r\n") + if splitIndex < 0: + continue + + let headerBlock = chunk[0 ..< splitIndex] + var content = chunk[splitIndex + 4 .. ^1] + content = stripTrailingCrlf(content) + + var name = "" + var filename = "" + var contentType = "application/octet-stream" + + for line in headerBlock.split("\r\n"): + let separator = line.find(':') + if separator <= 0: + continue + let headerName = line[0 ..< separator].strip().toLowerAscii() + let headerValue = line[separator + 1 .. ^1].strip() + + if headerName == "content-disposition": + for part in headerValue.split(';'): + let token = part.strip() + if token.startsWith("name="): + name = token[5 .. ^1].strip(chars = {'\"', '\''}) + elif token.startsWith("filename="): + filename = token[9 .. ^1].strip(chars = {'\"', '\''}) + elif headerName == "content-type": + contentType = headerValue + + if name.len > 0: + result.add( + MultipartPart( + name: name, filename: filename, contentType: contentType, content: content + ) + ) + +proc isSafeRelativePath(pathPart: string): bool = + pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and + not pathPart.startsWith("/") + +proc fileContentType(filePath: string): string = + let lowered = filePath.toLowerAscii() + if lowered.endsWith(".js"): + return "application/javascript; charset=utf-8" + if lowered.endsWith(".css"): + return "text/css; charset=utf-8" + if lowered.endsWith(".html"): + return "text/html; charset=utf-8" + if lowered.endsWith(".png"): + return "image/png" + if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"): + return "image/jpeg" + if lowered.endsWith(".gif"): + return "image/gif" + if lowered.endsWith(".webp"): + return "image/webp" + if lowered.endsWith(".svg"): + return "image/svg+xml" + if lowered.endsWith(".pdf"): + return "application/pdf" + "application/octet-stream" + +# response wrappers +proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} = + let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"}) + await req.respond(code, content, headers) + +proc respondText(req: Request, code: HttpCode, content: string) {.async.} = + let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"}) + await req.respond(code, content, headers) + +proc respondFile( + req: Request, + filePath: string, + asAttachment: bool = false, + attachmentName: string = "", +) {.async.} = + if not fileExists(filePath): + await respondText(req, Http404, "Not found") + return + + var headers = newHttpHeaders() + headers["Content-Type"] = fileContentType(filePath) + if asAttachment and attachmentName.len > 0: + headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\"" + + await req.respond(Http200, readFile(filePath), headers) + +# pandoc does the heavy lifting +proc runPandoc( + sourceMarkdown: string, + outputPath: string, + paperSize: string, + margin: string, + mainFont: string, + lineSpacing: string, + showPageNumbers: bool, +): tuple[ok: bool, output: string, missingPandoc: bool] = + let tempDir = getTempDir() / (AppName & "-" & randomHex(10)) + createDir(tempDir) + let tempMarkdownPath = tempDir / "source.md" + let tempRawPath = tempDir / "raw.md" + + try: + # write raw markdown first + writeFile(tempRawPath, sourceMarkdown) + + # preprocess markdown: convert to ascii with transliteration and normalize quotes + let iconvCmd = + "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) & + " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath) + let (_, iconvExitCode) = execCmdEx(iconvCmd) + + if iconvExitCode != 0: + # if preprocessing fails, fall back to original content + writeFile(tempMarkdownPath, sourceMarkdown) + + var args = @[ + tempMarkdownPath, + "--from", + "markdown+emoji+hard_line_breaks", + "--pdf-engine=lualatex", + "--template", + latexTemplatePath(), + "-V", + "margin=" & margin, + "-V", + "mainfont=" & mainFont, + "-V", + "linespacing=" & lineSpacing, + "--resource-path", + baseDir() & ":" & uploadsDir() & ":" & tempDir, + "-o", + outputPath, + ] + + let dims = lookupCustomPaper(paperSize) + if dims.width.len > 0: + args.add("-V") + args.add("paperwidth=" & dims.width) + args.add("-V") + args.add("paperheight=" & dims.height) + else: + args.add("-V") + args.add("papersize=" & paperSize) + + if not showPageNumbers: + args.add("-V") + args.add("hidepages=true") + + var process: Process + try: + process = + startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut}) + except OSError: + return ( + ok: false, + output: "Pandoc is not installed or not in PATH.", + missingPandoc: true, + ) + + let output = process.outputStream.readAll() + let exitCode = process.waitForExit() + process.close() + + if exitCode == 0: + return (ok: true, output: "", missingPandoc: false) + return (ok: false, output: output, missingPandoc: false) + finally: + try: + if fileExists(tempRawPath): + removeFile(tempRawPath) + if fileExists(tempMarkdownPath): + removeFile(tempMarkdownPath) + if dirExists(tempDir): + removeDir(tempDir) + except OSError: + discard + +# app endpoint: strict inputs, loud errors. +proc handleConvert(req: Request) {.async.} = + let formData = parseUrlEncoded(req.body) + let markdown = formData.getOrDefault("markdown", "").strip() + + if markdown.len == 0: + let html = renderTemplate( + partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")] + ) + await respondHtml(req, Http400, html) + return + + let paperSize = + pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes) + let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins) + + var mainFontFamily = formData.getOrDefault("main_font", "serif") + if mainFontFamily != "serif" and mainFontFamily != "sans": + mainFontFamily = "serif" + + let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella" + let lineSpacing = + pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings) + let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on" + let epoch = int(getTime().toUnix()) + let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf" + let outputPath = generatedDir() / outputName + + let conversion = runPandoc( + markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers + ) + + if not conversion.ok: + let message = + if conversion.missingPandoc: + conversion.output + else: + let stderr = conversion.output.strip() + if stderr.len > 0: + tailText(stderr) + else: + "PDF conversion failed." + + let html = renderTemplate( + partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))] + ) + let code = if conversion.missingPandoc: Http500 else: Http400 + await respondHtml(req, code, html) + return + + let html = renderTemplate( + partialsDir() / "result.html", + [ + ("{{ filename }}", htmlEscape(outputName)), + ("{{ download_url }}", "/download/" & encodeUrl(outputName)), + ], + ) + await respondHtml(req, Http200, html) + +# upload endpoint. accepts image, returns markdown snippet +proc handleUploadImage(req: Request) {.async.} = + let contentType = req.headers.getOrDefault("Content-Type") + let boundary = extractBoundary(contentType) + + if boundary.len == 0: + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "image file is required.")], + ) + await respondHtml(req, Http400, html) + return + + let parts = parseMultipart(req.body, boundary) + var imagePart: MultipartPart + var foundImage = false + for part in parts: + if part.name == "image": + imagePart = part + foundImage = true + break + + if not foundImage or imagePart.filename.strip().len == 0: + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "image file is required.")], + ) + await respondHtml(req, Http400, html) + return + + let originalName = sanitizeFilename(baseFilename(imagePart.filename)) + if originalName.len == 0 or not isAllowedImage(originalName): + let html = renderTemplate( + partialsDir() / "upload_error.html", + [("{{ message }}", "unsupported image type.")], + ) + await respondHtml(req, Http400, html) + return + + let extensionStart = originalName.rfind('.') + let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii() + + let epoch = int(getTime().toUnix()) + let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension + let imagePath = uploadsDir() / storedName + + writeFile(imagePath, imagePart.content) + + let markdownSnippet = "![](uploads/" & storedName & ")" + let html = renderTemplate( + partialsDir() / "upload_result.html", + [ + ("{{ filename }}", htmlEscape(storedName)), + ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)), + ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)), + ], + ) + await respondHtml(req, Http200, html) + +# router table +proc route(req: Request) {.async.} = + let path = req.url.path + + if req.reqMethod == HttpGet and path == "/": + await respondFile(req, templatesDir() / "index.html") + return + + if req.reqMethod == HttpGet and path.startsWith("/static/"): + let relativePath = decodeUrl(path[8 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile(req, staticDir() / relativePath) + return + + if req.reqMethod == HttpGet and path.startsWith("/uploads/"): + let relativePath = decodeUrl(path[9 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile(req, uploadsDir() / relativePath) + return + + if req.reqMethod == HttpGet and path.startsWith("/download/"): + let relativePath = decodeUrl(path[10 .. ^1]) + if not isSafeRelativePath(relativePath): + await respondText(req, Http400, "Invalid path") + return + await respondFile( + req, + generatedDir() / relativePath, + asAttachment = true, + attachmentName = relativePath, + ) + return + + if req.reqMethod == HttpPost and path == "/convert": + await handleConvert(req) + return + + if req.reqMethod == HttpPost and path == "/upload-image": + await handleUploadImage(req) + return + + await respondText(req, Http404, "Not found") + +# server boot, then we let htmx do htmx things. +when isMainModule: + randomize() + + if not dirExists(generatedDir()): + createDir(generatedDir()) + if not dirExists(uploadsDir()): + createDir(uploadsDir()) + + let server = newAsyncHttpServer() + echo "listening on http://localhost:5001" + waitFor server.serve(Port(5001), route) \ No newline at end of file diff --git a/src/app.py b/src/app.py index 88deef4..89666ad 100644 --- a/src/app.py +++ b/src/app.py @@ -1,35 +1,37 @@ #!/usr/bin/env python3 # likha-pdf — markdown to pdf, no latex required -# converts markdown to html, then html to pdf via weasyprint -# falls back to reportlab if weasyprint chokes — a pdf is always produced +# production-friendly flask app with weasyprint + reportlab fallback +import logging import os -import re import secrets import time +from pathlib import Path, PurePosixPath from flask import ( Flask, + Response, + current_app, request, send_from_directory, - render_template_string, abort, ) from markupsafe import escape from markdown import markdown -from pygments.formatters import HtmlFormatter from weasyprint import HTML +from werkzeug.middleware.proxy_fix import ProxyFix APP_NAME = "likha-pdf" -PORT = 5001 +DEFAULT_HOST = "0.0.0.0" +DEFAULT_PORT = 5001 -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -GENERATED_DIR = os.path.join(BASE_DIR, "generated") -UPLOADS_DIR = os.path.join(BASE_DIR, "uploads") -TEMPLATES_DIR = os.path.join(BASE_DIR, "templates") -PARTIALS_DIR = os.path.join(TEMPLATES_DIR, "partials") -STATIC_DIR = os.path.join(BASE_DIR, "static") +BASE_DIR = Path(__file__).resolve().parent +GENERATED_DIR = BASE_DIR / "generated" +UPLOADS_DIR = BASE_DIR / "uploads" +TEMPLATES_DIR = BASE_DIR / "templates" +PARTIALS_DIR = TEMPLATES_DIR / "partials" +STATIC_DIR = BASE_DIR / "static" ALLOWED_IMAGE_EXTS = {"png", "jpg", "jpeg", "gif", "webp", "svg"} @@ -93,16 +95,20 @@ MARKDOWN_EXT_CONFIG = { }, } -app = Flask( - __name__, - template_folder=TEMPLATES_DIR, - static_folder=STATIC_DIR, - static_url_path="/static", -) -app.config["MAX_CONTENT_LENGTH"] = 64 * 1024 * 1024 # 64 MB - # helpers +def env_bool(name, default=False): + raw = os.getenv(name) + if raw is None: + return default + return raw.strip().lower() in {"1", "true", "yes", "on"} + + +def ensure_runtime_dirs(): + GENERATED_DIR.mkdir(parents=True, exist_ok=True) + UPLOADS_DIR.mkdir(parents=True, exist_ok=True) + + def random_hex(length=32): return secrets.token_hex(length // 2) @@ -132,19 +138,15 @@ def is_allowed_image(filename): def is_safe_relative_path(path_part): - return ( - bool(path_part) - and ".." not in path_part - and "\\" not in path_part - and not path_part.startswith("/") - ) + if not path_part or "\\" in path_part: + return False + safe_path = PurePosixPath(path_part) + return not safe_path.is_absolute() and ".." not in safe_path.parts def read_partial(name, replacements=None): """read a partial html template and apply replacements""" - path = os.path.join(PARTIALS_DIR, name) - with open(path, "r", encoding="utf-8") as f: - content = f.read() + content = (PARTIALS_DIR / name).read_text(encoding="utf-8") if replacements: for token, value in replacements.items(): content = content.replace(token, value) @@ -312,7 +314,7 @@ def convert_with_weasyprint(full_html, output_path): try: doc = HTML( string=full_html, - base_url=BASE_DIR, + base_url=str(BASE_DIR), ) doc.write_pdf(output_path) return True, "" @@ -452,6 +454,7 @@ def generate_pdf(source_markdown, output_path, paper_size, margin, # weasyprint failed — fall back to reportlab try: + current_app.logger.warning("weasyprint failed, using reportlab fallback: %s", err) convert_with_reportlab( source_markdown, output_path, paper_size, margin, font_family, line_spacing, @@ -461,108 +464,145 @@ def generate_pdf(source_markdown, output_path, paper_size, margin, return False, f"weasyprint: {err} | reportlab: {fallback_err}" -# routes -@app.route("/") -def index(): - index_path = os.path.join(TEMPLATES_DIR, "index.html") - with open(index_path, "r", encoding="utf-8") as f: - return f.read() +def create_app(): + ensure_runtime_dirs() + app = Flask( + __name__, + template_folder=str(TEMPLATES_DIR), + static_folder=str(STATIC_DIR), + static_url_path="/static", + ) -@app.route("/convert", methods=["POST"]) -def convert(): - md = request.form.get("markdown", "").strip() - if not md: - return read_partial("error.html", { - "{{ message }}": "Markdown content is required.", - }), 400 + app.config["MAX_CONTENT_LENGTH"] = int(os.getenv("MAX_CONTENT_LENGTH", str(64 * 1024 * 1024))) - paper_size = pick_option( - request.form.get("paper_size", ""), "letterpaper", VALID_PAPER_SIZES, - ) - margin = pick_option( - request.form.get("margin", ""), "1in", VALID_MARGINS, - ) + if env_bool("TRUST_PROXY", default=True): + app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1) - font_family = request.form.get("main_font", "serif") - if font_family not in ("serif", "sans"): - font_family = "serif" + log_level = os.getenv("LOG_LEVEL", "INFO").upper() + app.logger.setLevel(log_level) - line_spacing = pick_option( - request.form.get("line_spacing", ""), "1", VALID_LINE_SPACINGS, - ) - show_page_numbers = request.form.get("page_numbers") == "on" + @app.after_request + def add_security_headers(resp): + resp.headers.setdefault("X-Content-Type-Options", "nosniff") + resp.headers.setdefault("X-Frame-Options", "DENY") + resp.headers.setdefault("Referrer-Policy", "no-referrer") + return resp - epoch = int(time.time()) - output_name = f"{APP_NAME}_{epoch}_{random_hex()}.pdf" - output_path = os.path.join(GENERATED_DIR, output_name) + @app.errorhandler(413) + def payload_too_large(_err): + return read_partial("upload_error.html", { + "{{ message }}": "request body too large.", + }), 413 + + @app.route("/healthz") + def healthz(): + return Response("ok\n", mimetype="text/plain") + + @app.route("/") + def index(): + return send_from_directory(str(TEMPLATES_DIR), "index.html") + + @app.route("/convert", methods=["POST"]) + def convert(): + md = request.form.get("markdown", "").strip() + if not md: + return read_partial("error.html", { + "{{ message }}": "Markdown content is required.", + }), 400 + + paper_size = pick_option( + request.form.get("paper_size", ""), "letterpaper", VALID_PAPER_SIZES, + ) + margin = pick_option( + request.form.get("margin", ""), "1in", VALID_MARGINS, + ) - ok, err = generate_pdf( - md, output_path, - paper_size, margin, font_family, line_spacing, show_page_numbers, - ) + font_family = request.form.get("main_font", "serif") + if font_family not in ("serif", "sans"): + font_family = "serif" - if not ok: - return read_partial("error.html", { - "{{ message }}": str(escape(tail_text(err))), - }), 500 + line_spacing = pick_option( + request.form.get("line_spacing", ""), "1", VALID_LINE_SPACINGS, + ) + show_page_numbers = request.form.get("page_numbers") == "on" + + output_name = f"{APP_NAME}_{int(time.time())}_{random_hex()}.pdf" + output_path = GENERATED_DIR / output_name + + ok, err = generate_pdf( + md, + str(output_path), + paper_size, + margin, + font_family, + line_spacing, + show_page_numbers, + ) - return read_partial("result.html", { - "{{ filename }}": str(escape(output_name)), - "{{ download_url }}": f"/download/{output_name}", - }) + if not ok: + app.logger.error("pdf generation failed: %s", err) + return read_partial("error.html", { + "{{ message }}": str(escape(tail_text(err))), + }), 500 + + return read_partial("result.html", { + "{{ filename }}": str(escape(output_name)), + "{{ download_url }}": f"/download/{output_name}", + }) + + @app.route("/upload-image", methods=["POST"]) + def upload_image(): + uploaded = request.files.get("image") + if not uploaded or not uploaded.filename or not uploaded.filename.strip(): + return read_partial("upload_error.html", { + "{{ message }}": "image file is required.", + }), 400 + + original = sanitize_filename(uploaded.filename) + if not original or not is_allowed_image(original): + return read_partial("upload_error.html", { + "{{ message }}": "unsupported image type.", + }), 400 + + ext = original.rsplit(".", 1)[-1].lower() + stored_name = f"img_{int(time.time())}_{random_hex()}.{ext}" + image_path = UPLOADS_DIR / stored_name + uploaded.save(str(image_path)) + + snippet = f"![](uploads/{stored_name})" + return read_partial("upload_result.html", { + "{{ filename }}": str(escape(stored_name)), + "{{ markdown_snippet }}": str(escape(snippet)), + "{{ preview_url }}": f"/uploads/{stored_name}", + }) + + @app.route("/uploads/") + def serve_upload(filename): + if not is_safe_relative_path(filename): + abort(400) + return send_from_directory(str(UPLOADS_DIR), filename, conditional=True) + + @app.route("/download/") + def download(filename): + if not is_safe_relative_path(filename): + abort(400) + return send_from_directory( + str(GENERATED_DIR), + filename, + as_attachment=True, + download_name=filename, + conditional=True, + ) + return app -@app.route("/upload-image", methods=["POST"]) -def upload_image(): - uploaded = request.files.get("image") - if not uploaded or not uploaded.filename or not uploaded.filename.strip(): - return read_partial("upload_error.html", { - "{{ message }}": "image file is required.", - }), 400 - original = sanitize_filename(uploaded.filename) - if not original or not is_allowed_image(original): - return read_partial("upload_error.html", { - "{{ message }}": "unsupported image type.", - }), 400 - - ext = original.rsplit(".", 1)[-1].lower() - epoch = int(time.time()) - stored_name = f"img_{epoch}_{random_hex()}.{ext}" - image_path = os.path.join(UPLOADS_DIR, stored_name) - uploaded.save(image_path) - - snippet = f"![](uploads/{stored_name})" - return read_partial("upload_result.html", { - "{{ filename }}": str(escape(stored_name)), - "{{ markdown_snippet }}": str(escape(snippet)), - "{{ preview_url }}": f"/uploads/{stored_name}", - }) - - -@app.route("/uploads/") -def serve_upload(filename): - if not is_safe_relative_path(filename): - abort(400) - return send_from_directory(UPLOADS_DIR, filename) - - -@app.route("/download/") -def download(filename): - if not is_safe_relative_path(filename): - abort(400) - return send_from_directory( - GENERATED_DIR, filename, - as_attachment=True, - download_name=filename, - ) +app = create_app() -# main if __name__ == "__main__": - os.makedirs(GENERATED_DIR, exist_ok=True) - os.makedirs(UPLOADS_DIR, exist_ok=True) - - print(f" {APP_NAME} listening on http://localhost:{PORT}") - app.run(host="0.0.0.0", port=PORT, debug=False) + host = os.getenv("HOST", DEFAULT_HOST) + port = int(os.getenv("PORT", str(DEFAULT_PORT))) + print(f" {APP_NAME} listening on http://{host}:{port}") + app.run(host=host, port=port, debug=False) -- cgit v1.2.3 From db51a5321bbc6f8731fa040e305efdbf727a99e0 Mon Sep 17 00:00:00 2001 From: Kyle Javier [kj_sh604] Date: Wed, 11 Mar 2026 23:31:27 -0400 Subject: refactor: replace screenshot in README with new image Updated the screenshot image in the README.--- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0b52be4..811daac 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ a simple web app that converts markdown to pdf. - likha-pdf screenshot

+ ## features -- cgit v1.2.3