Same thing for tearpages

2014-06-30 00:26:06 +02:00 · 2014-06-30 00:26:06 +02:00 · 5846cfb0ac
commit 5846cfb0ac
parent d4286ea5de
3 changed files with 2 additions and 72 deletions
--- a/README.md
+++ b/README.md
@ -56,7 +56,7 @@ Should be almost working and usable now, although still to be considered as **ex
 ```
 git clone https://github.com/Phyks/BMC
 ```
-* Install `arxiv2bib`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
+* Install `arxiv2bib`, `tear-pages`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
 ```
 sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbntools
 ```
--- a/bmc.py
+++ b/bmc.py
@ -211,7 +211,7 @@ def addFile(src, filetype, manual, autoconfirm, tag):
    # Remove first page of IOP papers
    try:
        if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article':
-            tearpages.tearpage(new_name)
+            tearpages.main(new_name)
    except:
        pass
--- a/tearpages.py
+++ b/tearpages.py
@ -1,70 +0,0 @@
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 # Author: Francois Boulogne
 # License: GPLv3
 __version__ = '0.1'
 import argparse
 import shutil
 import tempfile
 from PyPDF2 import PdfFileWriter, PdfFileReader
 from PyPDF2.utils import PdfReadError
 def fixPdf(pdfFile, destination):
    """
    Fix malformed pdf files when data are present after '%%EOF'
    :param pdfFile: PDF filepath
    :param destination: destination
    """
    tmp = tempfile.NamedTemporaryFile()
    output = open(tmp.name, 'wb')
    with open(pdfFile, "rb") as fh:
        with open(pdfFile, "rb") as fh:
            for line in fh:
                output.write(line)
                if b'%%EOF' in line:
                    break
    output.close()
    shutil.copy(tmp.name, destination)
 def tearpage(filename):
    """
    Copy filename to a tempfile, write pages 1..N to filename.
    :param filename: PDF filepath
    """
    # Copy the pdf to a tmp file
    tmp = tempfile.NamedTemporaryFile()
    shutil.copy(filename, tmp.name)
    # Read the copied pdf
    try:
        input_file = PdfFileReader(open(tmp.name, 'rb'))
    except PdfReadError:
        fixPdf(filename, tmp.name)
        input_file = PdfFileReader(open(tmp.name, 'rb'))
    # Seek for the number of pages
    num_pages = input_file.getNumPages()
    # Write pages excepted the first one
    output_file = PdfFileWriter()
    for i in range(1, num_pages):
        output_file.addPage(input_file.getPage(i))
    tmp.close()
    outputStream = open(filename, "wb")
    output_file.write(outputStream)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Remove the first page ' +
                                     'of a PDF', epilog='')
    parser.add_argument('--version', action='version', version=__version__)
    parser.add_argument('pdf', metavar='PDF', help='PDF filepath')
    args = parser.parse_args()
    tearpage(args.pdf)