Same thing for tearpages
This commit is contained in:
parent
d4286ea5de
commit
5846cfb0ac
@ -56,7 +56,7 @@ Should be almost working and usable now, although still to be considered as **ex
|
|||||||
```
|
```
|
||||||
git clone https://github.com/Phyks/BMC
|
git clone https://github.com/Phyks/BMC
|
||||||
```
|
```
|
||||||
* Install `arxiv2bib`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
|
* Install `arxiv2bib`, `tear-pages`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
|
||||||
```
|
```
|
||||||
sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbntools
|
sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbntools
|
||||||
```
|
```
|
||||||
|
2
bmc.py
2
bmc.py
@ -211,7 +211,7 @@ def addFile(src, filetype, manual, autoconfirm, tag):
|
|||||||
# Remove first page of IOP papers
|
# Remove first page of IOP papers
|
||||||
try:
|
try:
|
||||||
if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article':
|
if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article':
|
||||||
tearpages.tearpage(new_name)
|
tearpages.main(new_name)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
70
tearpages.py
70
tearpages.py
@ -1,70 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Author: Francois Boulogne
|
|
||||||
# License: GPLv3
|
|
||||||
|
|
||||||
__version__ = '0.1'
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import shutil
|
|
||||||
import tempfile
|
|
||||||
from PyPDF2 import PdfFileWriter, PdfFileReader
|
|
||||||
from PyPDF2.utils import PdfReadError
|
|
||||||
|
|
||||||
|
|
||||||
def fixPdf(pdfFile, destination):
|
|
||||||
"""
|
|
||||||
Fix malformed pdf files when data are present after '%%EOF'
|
|
||||||
|
|
||||||
:param pdfFile: PDF filepath
|
|
||||||
:param destination: destination
|
|
||||||
"""
|
|
||||||
tmp = tempfile.NamedTemporaryFile()
|
|
||||||
output = open(tmp.name, 'wb')
|
|
||||||
with open(pdfFile, "rb") as fh:
|
|
||||||
with open(pdfFile, "rb") as fh:
|
|
||||||
for line in fh:
|
|
||||||
output.write(line)
|
|
||||||
if b'%%EOF' in line:
|
|
||||||
break
|
|
||||||
output.close()
|
|
||||||
shutil.copy(tmp.name, destination)
|
|
||||||
|
|
||||||
|
|
||||||
def tearpage(filename):
|
|
||||||
"""
|
|
||||||
Copy filename to a tempfile, write pages 1..N to filename.
|
|
||||||
|
|
||||||
:param filename: PDF filepath
|
|
||||||
"""
|
|
||||||
# Copy the pdf to a tmp file
|
|
||||||
tmp = tempfile.NamedTemporaryFile()
|
|
||||||
shutil.copy(filename, tmp.name)
|
|
||||||
|
|
||||||
# Read the copied pdf
|
|
||||||
try:
|
|
||||||
input_file = PdfFileReader(open(tmp.name, 'rb'))
|
|
||||||
except PdfReadError:
|
|
||||||
fixPdf(filename, tmp.name)
|
|
||||||
input_file = PdfFileReader(open(tmp.name, 'rb'))
|
|
||||||
# Seek for the number of pages
|
|
||||||
num_pages = input_file.getNumPages()
|
|
||||||
|
|
||||||
# Write pages excepted the first one
|
|
||||||
output_file = PdfFileWriter()
|
|
||||||
for i in range(1, num_pages):
|
|
||||||
output_file.addPage(input_file.getPage(i))
|
|
||||||
|
|
||||||
tmp.close()
|
|
||||||
outputStream = open(filename, "wb")
|
|
||||||
output_file.write(outputStream)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser(description='Remove the first page ' +
|
|
||||||
'of a PDF', epilog='')
|
|
||||||
parser.add_argument('--version', action='version', version=__version__)
|
|
||||||
parser.add_argument('pdf', metavar='PDF', help='PDF filepath')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
tearpage(args.pdf)
|
|
Loading…
Reference in New Issue
Block a user