From 5846cfb0ac825e9d5fbe80ad8b280432b6b951a5 Mon Sep 17 00:00:00 2001 From: Phyks Date: Mon, 30 Jun 2014 00:26:06 +0200 Subject: [PATCH] Same thing for tearpages --- README.md | 2 +- bmc.py | 2 +- tearpages.py | 70 ---------------------------------------------------- 3 files changed, 2 insertions(+), 72 deletions(-) delete mode 100644 tearpages.py diff --git a/README.md b/README.md index 27ea770..8b77f0b 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Should be almost working and usable now, although still to be considered as **ex ``` git clone https://github.com/Phyks/BMC ``` -* Install `arxiv2bib`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi +* Install `arxiv2bib`, `tear-pages`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi ``` sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbntools ``` diff --git a/bmc.py b/bmc.py index fd1bebb..79e40b4 100755 --- a/bmc.py +++ b/bmc.py @@ -211,7 +211,7 @@ def addFile(src, filetype, manual, autoconfirm, tag): # Remove first page of IOP papers try: if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article': - tearpages.tearpage(new_name) + tearpages.main(new_name) except: pass diff --git a/tearpages.py b/tearpages.py deleted file mode 100644 index 6f442d7..0000000 --- a/tearpages.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -# Author: Francois Boulogne -# License: GPLv3 - -__version__ = '0.1' - -import argparse -import shutil -import tempfile -from PyPDF2 import PdfFileWriter, PdfFileReader -from PyPDF2.utils import PdfReadError - - -def fixPdf(pdfFile, destination): - """ - Fix malformed pdf files when data are present after '%%EOF' - - :param pdfFile: PDF filepath - :param destination: destination - """ - tmp = tempfile.NamedTemporaryFile() - output = open(tmp.name, 'wb') - with open(pdfFile, "rb") as fh: - with open(pdfFile, "rb") as fh: - for line in fh: - output.write(line) - if b'%%EOF' in line: - break - output.close() - shutil.copy(tmp.name, destination) - - -def tearpage(filename): - """ - Copy filename to a tempfile, write pages 1..N to filename. - - :param filename: PDF filepath - """ - # Copy the pdf to a tmp file - tmp = tempfile.NamedTemporaryFile() - shutil.copy(filename, tmp.name) - - # Read the copied pdf - try: - input_file = PdfFileReader(open(tmp.name, 'rb')) - except PdfReadError: - fixPdf(filename, tmp.name) - input_file = PdfFileReader(open(tmp.name, 'rb')) - # Seek for the number of pages - num_pages = input_file.getNumPages() - - # Write pages excepted the first one - output_file = PdfFileWriter() - for i in range(1, num_pages): - output_file.addPage(input_file.getPage(i)) - - tmp.close() - outputStream = open(filename, "wb") - output_file.write(outputStream) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Remove the first page ' + - 'of a PDF', epilog='') - parser.add_argument('--version', action='version', version=__version__) - parser.add_argument('pdf', metavar='PDF', help='PDF filepath') - args = parser.parse_args() - - tearpage(args.pdf)