summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkj_sh6042026-02-13 01:35:31 -0500
committerkj_sh6042026-02-13 01:35:31 -0500
commitdf983f4efab1ec881fdd8a1483c83c68ded3225f (patch)
tree0e0ac7ba066cabd5cd48f1448443a1e44ed69400
feat: initial commit
-rw-r--r--.gitignore1
-rwxr-xr-xkjandoc184
2 files changed, 185 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1d17dae
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
.venv
diff --git a/kjandoc b/kjandoc
new file mode 100755
index 0000000..233b59e
--- /dev/null
+++ b/kjandoc
@@ -0,0 +1,184 @@
1#!/usr/bin/env python3
2
3# merge pptx files using libreoffice slide rendering + python-pptx reconstruction.
4# creative approach: render each slide as a high-res image via libreoffice,
5# then stitch them into one pptx. this preserves 100% of the visual formatting
6# since we're working with exact raster snapshots of each slide.
7
8import os
9import sys
10import glob
11import shutil
12import subprocess
13import tempfile
14from pathlib import Path
15from PIL import Image
16from pptx import Presentation
17from pptx.util import Emu
18
19
20DPI = 300 # high-res export
21
22
23def pptx_to_images(pptx_path, output_dir):
24 # use libreoffice to convert pptx -> pdf, then pdf -> images via pdftoppm.
25 # fallback: libreoffice direct png export if pdftoppm unavailable.
26 pptx_path = os.path.abspath(pptx_path)
27 pdf_path = os.path.join(output_dir, Path(pptx_path).stem + '.pdf')
28
29 # step 1: pptx -> pdf via libreoffice (preserves all formatting)
30 subprocess.run([
31 'libreoffice', '--headless', '--convert-to', 'pdf',
32 '--outdir', output_dir, pptx_path
33 ], check=True, capture_output=True)
34
35 if not os.path.exists(pdf_path):
36 raise RuntimeError(f"libreoffice failed to produce {pdf_path}")
37
38 # step 2: pdf -> png images
39 # try pdftoppm first (from poppler-utils, higher quality)
40 image_prefix = os.path.join(output_dir, 'slide')
41
42 if shutil.which('pdftoppm'):
43 subprocess.run([
44 'pdftoppm', '-png', '-r', str(DPI),
45 pdf_path, image_prefix
46 ], check=True, capture_output=True)
47 else:
48 # fallback: use libreoffice to export as images directly
49 # this works but pdftoppm gives better quality
50 subprocess.run([
51 'libreoffice', '--headless', '--convert-to', 'png',
52 '--outdir', output_dir, pdf_path
53 ], check=True, capture_output=True)
54
55 # collect and sort image files
56 images = sorted(glob.glob(os.path.join(output_dir, 'slide-*.png')))
57 if not images:
58 images = sorted(glob.glob(os.path.join(output_dir, '*.png')))
59
60 if not images:
61 raise RuntimeError("no slide images produced")
62
63 return images
64
65
66def images_to_pptx(image_groups, output_path, slide_width_emu=9144000, slide_height_emu=6858000):
67 # build a pptx from slide images, one image per slide filling the entire area.
68 prs = Presentation()
69 prs.slide_width = slide_width_emu
70 prs.slide_height = slide_height_emu
71
72 # use blank layout (index 6 is typically blank)
73 blank_layout = None
74 for layout in prs.slide_layouts:
75 if layout.name == 'Blank':
76 blank_layout = layout
77 break
78 if blank_layout is None:
79 blank_layout = prs.slide_layouts[6] if len(prs.slide_layouts) > 6 else prs.slide_layouts[0]
80
81 total = 0
82 for label, images in image_groups:
83 for img_path in images:
84 slide = prs.slides.add_slide(blank_layout)
85
86 # remove any placeholder shapes from blank layout
87 for ph in list(slide.placeholders):
88 sp = ph._element
89 sp.getparent().remove(sp)
90
91 # add image covering the full slide
92 slide.shapes.add_picture(
93 img_path,
94 left=0,
95 top=0,
96 width=slide_width_emu,
97 height=slide_height_emu
98 )
99 total += 1
100 print(f" [{label}] {len(images)} slides added")
101
102 prs.save(output_path)
103 return total
104
105
106def merge_presentations(*pptx_files, output='merged.pptx'):
107 # merge pptx files with zero formatting loss via image rendering.
108 if not pptx_files:
109 raise ValueError("no files provided")
110
111 image_groups = []
112
113 for pptx_file in pptx_files:
114 label = Path(pptx_file).stem
115 print(f"[*] rendering: {label}")
116
117 tmpdir = tempfile.mkdtemp(prefix=f'pptx_merge_{label}_')
118 try:
119 images = pptx_to_images(pptx_file, tmpdir)
120 image_groups.append((label, images))
121 print(f" -> {len(images)} slides rendered at {DPI} DPI")
122 except Exception as e:
123 print(f" [!] error rendering {label}: {e}", file=sys.stderr)
124 raise
125
126 # get slide dimensions from first presentation
127 first_prs = Presentation(pptx_files[0])
128 sw = first_prs.slide_width
129 sh = first_prs.slide_height
130
131 print(f"[*] building merged presentation...")
132 total = images_to_pptx(image_groups, output, sw, sh)
133 print(f"[+] merged {total} slides from {len(pptx_files)} presentations -> {output}")
134
135 # cleanup temp dirs
136 for label, images in image_groups:
137 if images:
138 tmpdir = os.path.dirname(images[0])
139 shutil.rmtree(tmpdir, ignore_errors=True)
140
141 return output
142
143
144if __name__ == "__main__":
145 import argparse
146
147 parser = argparse.ArgumentParser(
148 description='merge pptx files with perfect formatting preservation via rendering',
149 usage='%(prog)s input1.pptx [input2.pptx ...] -o output.pptx'
150 )
151 parser.add_argument(
152 'inputs',
153 metavar='INPUT',
154 nargs='+',
155 help='input pptx files to merge'
156 )
157 parser.add_argument(
158 '-o', '--output',
159 default='merged.pptx',
160 help='output filename (default: merged.pptx)'
161 )
162 parser.add_argument(
163 '--dpi',
164 type=int,
165 default=DPI,
166 help=f'rendering DPI for slide images (default: {DPI})'
167 )
168
169 args = parser.parse_args()
170
171 # validate input files exist
172 for f in args.inputs:
173 if not os.path.exists(f):
174 print(f"[!] error: file not found: {f}", file=sys.stderr)
175 sys.exit(1)
176 if not f.lower().endswith('.pptx'):
177 print(f"[!] error: not a pptx file: {f}", file=sys.stderr)
178 sys.exit(1)
179
180 # update global DPI if specified
181 if args.dpi != DPI:
182 globals()['DPI'] = args.dpi
183
184 merge_presentations(*args.inputs, output=args.output) \ No newline at end of file