WPS能拆分合并,但却是要输入编辑密码,我没有。故写了个脚本来做拆分,顺便附上合并的代码。
代码如下(extract.py)
#!/usr/bin/env python
"""PDF拆分脚本(需要Python3.10+)
Usage::
$ python extract.py <pdf-file>
"""
import os
import sys
from pathlib import Path
# pip install PyMuPDF
import fitz # type:ignore[import-untyped]
SRC_FILE = Path.home() / "Downloads" / "yasi.pdf"
def new_one(pdf: fitz.Document, page_num: int, parent: Path | None = None) -> Path:
target = Path(f"{page_num}.pdf")
if parent is not None:
target = parent / target.name
new_pdf = fitz.Document()
# 用第page_num页生成新的PDF文件
index = page_num - 1
new_pdf.insert_pdf(pdf, from_page=index, to_page=index)
new_pdf.save(target)
return target
def extract(
file: Path,
num: int | None = None,
) -> Path:
"""拆分PDF
:param file: 文件路径
:param num: 要拆分出哪一页,如果传None或不传,则每一页都拆分出来
"""
with fitz.open(file) as f:
if num is None:
folder = Path(file.stem)
if not folder.exists():
print(f"Directory {folder} created!")
folder.mkdir()
print(f"Total pages of {file} is {f.page_count}.")
for num in range(1, f.page_count + 1):
new_one(f, num, folder)
return folder
else:
return new_one(f, num)
def main() -> None:
file = SRC_FILE
page_num: int | None = None
if sys.argv[1:]:
if (a := sys.argv[1]).isdigit():
page_num = int(a)
elif (_p := Path(a)).is_file():
file = _p
if sys.argv[2:] and sys.argv[2].isdigit():
page_num = int(sys.argv[2])
elif _p.suffix.lower() == ".pdf":
print(f"文件`{_p}`不存在!")
elif not file.exists():
while True:
a = input("请输入要拆分的PDF文件路径:").strip()
if "~" in a:
a = os.path.expanduser(a)
if (_p := Path(a)).is_file():
file = _p
break
else:
print(f"文件{_p}不存在,请重新输入。\n")
dst = extract(file, page_num)
if dst.is_file():
print(f"Save file to {dst}")
else:
print(f"Save files to {dst}{os.sep}")
if __name__ == "__main__": # pragma: no cover
main()
合并的代码如下:
from pathlib import Path
import fitz
def merge(*files: str, new_name: str | None = None, verbose=True) -> Path:
ps = [Path(i) for i in files]
if new_name is None:
new_name = '_'.join(i.stem for i in ps) + '.pdf'
target = Path(new_name)
new_pdf = fitz.Document()
for p in ps:
with fitz.open(p) as f:
new_pdf.insert_pdf(f)
new_pdf.save(target)
if verbose:
print(f'Save file to {target}')
return target
merge('1.pdf', '2.pdf')