From f2bfdf5336b6ac8e3199ea6e42031afe03ffcc66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Boulogne?= Date: Sat, 12 Jul 2014 23:00:59 -0400 Subject: [PATCH 1/2] I, F. Boulogne, as the author, relicense this code. --- libbmc/tearpages.py | 57 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 libbmc/tearpages.py diff --git a/libbmc/tearpages.py b/libbmc/tearpages.py new file mode 100755 index 0000000..c21989f --- /dev/null +++ b/libbmc/tearpages.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Francois Boulogne + +import shutil +import tempfile +from PyPDF2 import PdfFileWriter, PdfFileReader +from PyPDF2.utils import PdfReadError + + +def _fixPdf(pdfFile, destination): + """ + Fix malformed pdf files when data are present after '%%EOF' + + :param pdfFile: PDF filepath + :param destination: destination + """ + tmp = tempfile.NamedTemporaryFile() + output = open(tmp.name, 'wb') + with open(pdfFile, "rb") as fh: + with open(pdfFile, "rb") as fh: + for line in fh: + output.write(line) + if b'%%EOF' in line: + break + output.close() + shutil.copy(tmp.name, destination) + + +def tearpage(filename, startpage=1): + """ + Copy filename to a tempfile, write pages startpage..N to filename. + + :param filename: PDF filepath + :param startpage: page number for the new first page + """ + # Copy the pdf to a tmp file + tmp = tempfile.NamedTemporaryFile() + shutil.copy(filename, tmp.name) + + # Read the copied pdf + try: + input_file = PdfFileReader(open(tmp.name, 'rb')) + except PdfReadError: + _fixPdf(filename, tmp.name) + input_file = PdfFileReader(open(tmp.name, 'rb')) + # Seek for the number of pages + num_pages = input_file.getNumPages() + + # Write pages excepted the first one + output_file = PdfFileWriter() + for i in range(startpage, num_pages): + output_file.addPage(input_file.getPage(i)) + + tmp.close() + outputStream = open(filename, "wb") + output_file.write(outputStream) From 61801c50f688064c0b1283eef64ab5a3760c7db1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Boulogne?= Date: Sat, 12 Jul 2014 23:03:02 -0400 Subject: [PATCH 2/2] Update readme for tearpages --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index fb0f463..1195dad 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Should be almost working and usable now, although still to be considered as **ex ``` git clone https://github.com/Phyks/BMC ``` -* Install `arxiv2bib`, `tear-pages`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbnlib` _via_ Pypi +* Install `arxiv2bib`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbnlib` _via_ Pypi ``` sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbnlib ``` @@ -132,7 +132,6 @@ All the source code I wrote is under a `no-alcohol beer-ware license`. All funct * --------------------------------------------------------------------------------- ``` -I used the `tearpages.py` script from sciunto, which can be found [here](https://github.com/sciunto/tear-pages) and is released under a GNU GPLv3 license. ## Inspiration