2014-04-28 22:23:05 +02:00
|
|
|
#!/usr/bin/env python2
|
2014-04-26 23:26:25 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Author: Francois Boulogne
|
|
|
|
# License: GPLv3
|
|
|
|
|
|
|
|
__version__ = '0.1'
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import shutil
|
|
|
|
import tempfile
|
|
|
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
|
|
|
from PyPDF2.utils import PdfReadError
|
|
|
|
|
|
|
|
|
|
|
|
def fixPdf(pdfFile, destination):
|
|
|
|
"""
|
|
|
|
Fix malformed pdf files when data are present after '%%EOF'
|
|
|
|
|
|
|
|
:param pdfFile: PDF filepath
|
|
|
|
:param destination: destination
|
|
|
|
"""
|
|
|
|
tmp = tempfile.NamedTemporaryFile()
|
|
|
|
output = open(tmp.name, 'wb')
|
|
|
|
with open(pdfFile, "rb") as fh:
|
|
|
|
with open(pdfFile, "rb") as fh:
|
|
|
|
for line in fh:
|
|
|
|
output.write(line)
|
|
|
|
if b'%%EOF' in line:
|
|
|
|
break
|
|
|
|
output.close()
|
|
|
|
shutil.copy(tmp.name, destination)
|
|
|
|
|
2014-04-28 22:23:05 +02:00
|
|
|
|
2014-04-26 23:26:25 +02:00
|
|
|
def tearpage(filename):
|
|
|
|
"""
|
|
|
|
Copy filename to a tempfile, write pages 1..N to filename.
|
|
|
|
|
|
|
|
:param filename: PDF filepath
|
|
|
|
"""
|
|
|
|
# Copy the pdf to a tmp file
|
|
|
|
tmp = tempfile.NamedTemporaryFile()
|
|
|
|
shutil.copy(filename, tmp.name)
|
|
|
|
|
|
|
|
# Read the copied pdf
|
|
|
|
try:
|
|
|
|
input_file = PdfFileReader(open(tmp.name, 'rb'))
|
|
|
|
except PdfReadError:
|
|
|
|
fixPdf(filename, tmp.name)
|
|
|
|
input_file = PdfFileReader(open(tmp.name, 'rb'))
|
|
|
|
# Seek for the number of pages
|
|
|
|
num_pages = input_file.getNumPages()
|
|
|
|
|
|
|
|
# Write pages excepted the first one
|
|
|
|
output_file = PdfFileWriter()
|
2014-04-30 00:36:15 +02:00
|
|
|
for i in range(1, num_pages):
|
2014-04-26 23:26:25 +02:00
|
|
|
output_file.addPage(input_file.getPage(i))
|
|
|
|
|
|
|
|
tmp.close()
|
|
|
|
outputStream = open(filename, "wb")
|
|
|
|
output_file.write(outputStream)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2014-04-28 22:23:05 +02:00
|
|
|
parser = argparse.ArgumentParser(description='Remove the first page ' +
|
|
|
|
'of a PDF', epilog='')
|
2014-04-26 23:26:25 +02:00
|
|
|
parser.add_argument('--version', action='version', version=__version__)
|
|
|
|
parser.add_argument('pdf', metavar='PDF', help='PDF filepath')
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
tearpage(args.pdf)
|