有時如果要將幾份pdf 合成一份,或者刪除某幾頁,要俾錢的 acrobat 先做到,但明明很簡單,所以不如自己寫一段仔python,用上免費的library…
有三個功能,
第一個是合併幾份pdf
第二個是拆散pdf
第三個是抽取其中某幾頁
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger
import sys
import os
def merge(pdf_list):
input_pdf = pdf_list[:-1]
output_pdf = pdf_list[-1]
# check file exists
for pdf in input_pdf:
if not os.path.exists(pdf):
print("error: {} not exists".format(pdf))
return False
if os.path.exists(output_pdf):
print("error output file {} already exists".format(output_pdf))
merger = PdfFileMerger()
for pdf in input_pdf:
merger.append(open(pdf, 'rb'))
with open(output_pdf, 'wb') as fout:
merger.write(fout)
def split(input_pdf):
if not os.path.exists(input_pdf):
print("error. file {} not exists".format(input_pdf))
return False
inputpdf = PdfFileReader(open(input_pdf, "rb"))
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
with open("{}-page{}.pdf".format(input_pdf, i), "wb") as outputStream:
output.write(outputStream)
def extract(input_pdf, start, end):
inputpdf = PdfFileReader(open(input_pdf, "rb"))
input_name = os.path.splitext(input_pdf)[0]
output = PdfFileWriter()
output_name = "{}_{}-{}.pdf".format(input_name, start, end)
outputStream = open(output_name, "wb")
end = min(inputpdf.numPages+1,end+1)
print("extract {} from page {} to page {} to file {}".format(input_pdf, start, end, output_name))
for i in range(start, end):
output.addPage(inputpdf.getPage(i-1))
output.write(outputStream)
def help():
print("{} ACTION <ip or subnet of the target> [range of the port]".format(sys.argv[0]))
print("ACTION")
print(" merge <pdf separated by space}")
print(" split input.pdf")
print(" extract input.pdf <start page>[:end page]")
if __name__ == "__main__":
if len(sys.argv) < 2:
help()
exit()
if sys.argv[1] == "merge":
if len(sys.argv) < 4:
print("at least two pdf files to be merged")
help()
exit()
else:
merge(sys.argv[2:])
elif sys.argv[1] == "split":
if len(sys.argv) < 3:
print("at least two pdf files to be merged")
help()
exit()
else:
split(sys.argv[2])
elif sys.argv[1] == "extract":
if len(sys.argv) < 4:
help()
exit()
else:
input_pdf = sys.argv[2]
page_range = sys.argv[3].split(":")
start = int(page_range[0])
end = int(page_range[1]) if len(page_range) > 1 else start
extract(input_pdf, start, end)