BLACK CAT PROGRAMMER

pdf tools

有時如果要將幾份pdf 合成一份,或者刪除某幾頁,要俾錢的 acrobat 先做到,但明明很簡單,所以不如自己寫一段仔python,用上免費的library…

有三個功能,
第一個是合併幾份pdf
第二個是拆散pdf
第三個是抽取其中某幾頁

from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger
import sys
import os
           
def merge(pdf_list):
    input_pdf = pdf_list[:-1]
    output_pdf = pdf_list[-1]
    # check file exists
    for pdf in input_pdf:
        if not os.path.exists(pdf):
            print("error: {} not exists".format(pdf))
            return False
    if os.path.exists(output_pdf):
        print("error output file {} already exists".format(output_pdf))

    merger = PdfFileMerger()
    for pdf in input_pdf:
        merger.append(open(pdf, 'rb'))

    with open(output_pdf, 'wb') as fout:
        merger.write(fout)

def split(input_pdf):
    if not os.path.exists(input_pdf):
        print("error. file {} not exists".format(input_pdf))
        return False

    inputpdf = PdfFileReader(open(input_pdf, "rb"))

    for i in range(inputpdf.numPages):
        output = PdfFileWriter()
        output.addPage(inputpdf.getPage(i))
        with open("{}-page{}.pdf".format(input_pdf, i), "wb") as outputStream:
            output.write(outputStream)

def extract(input_pdf, start, end):
    inputpdf = PdfFileReader(open(input_pdf, "rb"))
    input_name = os.path.splitext(input_pdf)[0]
    output = PdfFileWriter()
    output_name = "{}_{}-{}.pdf".format(input_name, start, end)
    outputStream = open(output_name, "wb")

    end = min(inputpdf.numPages+1,end+1)

    print("extract {} from page {} to page {} to file {}".format(input_pdf, start, end, output_name))
    for i in range(start, end):
        output.addPage(inputpdf.getPage(i-1))
    output.write(outputStream)

def help():
    print("{} ACTION <ip or subnet of the target> [range of the port]".format(sys.argv[0]))
    print("ACTION")
    print("  merge <pdf separated by space}")
    print("  split input.pdf")
    print("  extract input.pdf <start page>[:end page]")
    
    
if __name__ == "__main__":
    if len(sys.argv) < 2:
        help()
        exit()
    if sys.argv[1] == "merge":
        if len(sys.argv) < 4:
            print("at least two pdf files to be merged")
            help()
            exit()
        else:
            merge(sys.argv[2:])

    elif sys.argv[1] == "split":
        if len(sys.argv) < 3:
            print("at least two pdf files to be merged")
            help()
            exit()
        else:
            split(sys.argv[2])

    elif sys.argv[1] == "extract":
        if len(sys.argv) < 4:
            help()
            exit()
        else:
            input_pdf  = sys.argv[2]
            page_range = sys.argv[3].split(":")
            start = int(page_range[0]) 
            end = int(page_range[1]) if len(page_range) > 1 else start

            extract(input_pdf, start, end)
Posted in notesTagged ,