需求背景
提供一个Word文档模板,使用python程序替换里边的占位符,替换内容包括文本和图片,然后输出docx或者PDF文件。
功能演示
输入示例
输出示例
实现程序
import os
import shutil
import subprocess
import time
from docx import Document
from docx.shared import Pt
class DocFiller:
def __init__(self, doc_file_path, libreoffice_path=None):
self.doc = Document(doc_file_path)
self.libreoffice_path = libreoffice_path
def _process_paragraphs(self, paragraphs, placeholders):
# 遍历处理每个段落
for paragraph in paragraphs:
# 遍历处理每个段落中的每个run
for run in paragraph.runs:
self._process_run(run, placeholders)
@staticmethod
def _process_run(run, placeholders):
for placeholder, replacement in placeholders.items():
# 文本替换
if isinstance(replacement, str):
run.text = run.text.replace(placeholder, replacement)
# 图片替换
elif isinstance(replacement, tuple) and len(replacement) == 3:
image_path, width, height = replacement
if placeholder in run.text:
# 保留占位符前后文本,只替换占位符部分
texts = run.text.split(placeholder, 1)
run.clear()
run.add_text(texts[0])
run.add_picture(image_path, width=Pt(width), height=Pt(height))
run.add_text(texts[1])
def _process_tables(self, doc_tables, placeholders):
for table in doc_tables:
for row in table.rows:
for cell in row.cells:
self._process_paragraphs(cell.paragraphs, placeholders)
def fill(self, placeholders):
self._process_paragraphs(self.doc.paragraphs, placeholders)
self._process_tables(self.doc.tables, placeholders)
def convert_to_pdf(self, docx_path, pdf_path):
output_dir = os.path.dirname(pdf_path)
temp_pdf_path = os.path.join(output_dir, os.path.basename(docx_path).rsplit('.', 1)[0] + '.pdf')
try:
subprocess.run([
self.libreoffice_path, '--headless', '--convert-to', 'pdf:writer_pdf_Export',
'--outdir', output_dir, docx_path], check=True)
shutil.move(temp_pdf_path, pdf_path)
print(f"conversion successful: {pdf_path}")
except subprocess.CalledProcessError as e:
print(f"convert the document error: {e}")
def to_docx(self, outputfile):
self.doc.save(path_or_stream=outputfile)
print('to docx complete')
def to_pdf(self, outputfile):
temp_file = f'{int(time.time() * 1e6)}.docx'
self.to_docx(temp_file)
try:
self.convert_to_pdf(temp_file, outputfile)
finally:
os.remove(temp_file)
print('to pdf complete')
if __name__ == '__main__':
liberoffice = 'D:/LibreOffice/program/soffice.exe'
d = DocFiller('template.docx', liberoffice)
data = {
'{{meeting_time}}': '2024年12月12日 下午3点半',
'{{meeting_address}}': '中央会议室',
'{{host}}': '小白',
'{{photo}}': ('./0.png', 80, 80),
'{{image1}}': ('./1.jpg', 8, 8),
'{{image2}}': ('./2.jpg', 300, 200)
}
d.fill(data)
# 生成docx文件
# d.to_docx('data/output.docx')
# 生成pdf文件
d.to_pdf('data/output.pdf')
说明
- 本程序依赖python-docx库和liberoffice,可以实现docx文件模板内容替换。
- 如果替换后只需生成Word文档,则无需安装liberoffice,也不用指定liberoffice可执行文件路径。
- 如果需要生成PDF文件,则需要安装liberoffice,并将程序中的liberoffice可执行文件路径修改为自己的路径。