Same thing for tearpages

This commit is contained in:
Phyks 2014-06-30 00:26:06 +02:00
parent d4286ea5de
commit 5846cfb0ac
3 changed files with 2 additions and 72 deletions

View File

@ -56,7 +56,7 @@ Should be almost working and usable now, although still to be considered as **ex
```
git clone https://github.com/Phyks/BMC
```
* Install `arxiv2bib`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
* Install `arxiv2bib`, `tear-pages`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
```
sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbntools
```

2
bmc.py
View File

@ -211,7 +211,7 @@ def addFile(src, filetype, manual, autoconfirm, tag):
# Remove first page of IOP papers
try:
if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article':
tearpages.tearpage(new_name)
tearpages.main(new_name)
except:
pass

View File

@ -1,70 +0,0 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Author: Francois Boulogne
# License: GPLv3
__version__ = '0.1'
import argparse
import shutil
import tempfile
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.utils import PdfReadError
def fixPdf(pdfFile, destination):
"""
Fix malformed pdf files when data are present after '%%EOF'
:param pdfFile: PDF filepath
:param destination: destination
"""
tmp = tempfile.NamedTemporaryFile()
output = open(tmp.name, 'wb')
with open(pdfFile, "rb") as fh:
with open(pdfFile, "rb") as fh:
for line in fh:
output.write(line)
if b'%%EOF' in line:
break
output.close()
shutil.copy(tmp.name, destination)
def tearpage(filename):
"""
Copy filename to a tempfile, write pages 1..N to filename.
:param filename: PDF filepath
"""
# Copy the pdf to a tmp file
tmp = tempfile.NamedTemporaryFile()
shutil.copy(filename, tmp.name)
# Read the copied pdf
try:
input_file = PdfFileReader(open(tmp.name, 'rb'))
except PdfReadError:
fixPdf(filename, tmp.name)
input_file = PdfFileReader(open(tmp.name, 'rb'))
# Seek for the number of pages
num_pages = input_file.getNumPages()
# Write pages excepted the first one
output_file = PdfFileWriter()
for i in range(1, num_pages):
output_file.addPage(input_file.getPage(i))
tmp.close()
outputStream = open(filename, "wb")
output_file.write(outputStream)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Remove the first page ' +
'of a PDF', epilog='')
parser.add_argument('--version', action='version', version=__version__)
parser.add_argument('pdf', metavar='PDF', help='PDF filepath')
args = parser.parse_args()
tearpage(args.pdf)