commit
20517210ff
@ -56,7 +56,7 @@ Should be almost working and usable now, although still to be considered as **ex
|
|||||||
```
|
```
|
||||||
git clone https://github.com/Phyks/BMC
|
git clone https://github.com/Phyks/BMC
|
||||||
```
|
```
|
||||||
* Install `arxiv2bib`, `tear-pages`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbnlib` _via_ Pypi
|
* Install `arxiv2bib`, `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbnlib` _via_ Pypi
|
||||||
```
|
```
|
||||||
sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbnlib
|
sudo pip install arxiv2bib requesocks bibtexparser pyPDF2 isbnlib
|
||||||
```
|
```
|
||||||
@ -132,7 +132,6 @@ All the source code I wrote is under a `no-alcohol beer-ware license`. All funct
|
|||||||
* ---------------------------------------------------------------------------------
|
* ---------------------------------------------------------------------------------
|
||||||
```
|
```
|
||||||
|
|
||||||
I used the `tearpages.py` script from sciunto, which can be found [here](https://github.com/sciunto/tear-pages) and is released under a GNU GPLv3 license.
|
|
||||||
|
|
||||||
## Inspiration
|
## Inspiration
|
||||||
|
|
||||||
|
57
libbmc/tearpages.py
Executable file
57
libbmc/tearpages.py
Executable file
@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Author: Francois Boulogne
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
|
from PyPDF2.utils import PdfReadError
|
||||||
|
|
||||||
|
|
||||||
|
def _fixPdf(pdfFile, destination):
|
||||||
|
"""
|
||||||
|
Fix malformed pdf files when data are present after '%%EOF'
|
||||||
|
|
||||||
|
:param pdfFile: PDF filepath
|
||||||
|
:param destination: destination
|
||||||
|
"""
|
||||||
|
tmp = tempfile.NamedTemporaryFile()
|
||||||
|
output = open(tmp.name, 'wb')
|
||||||
|
with open(pdfFile, "rb") as fh:
|
||||||
|
with open(pdfFile, "rb") as fh:
|
||||||
|
for line in fh:
|
||||||
|
output.write(line)
|
||||||
|
if b'%%EOF' in line:
|
||||||
|
break
|
||||||
|
output.close()
|
||||||
|
shutil.copy(tmp.name, destination)
|
||||||
|
|
||||||
|
|
||||||
|
def tearpage(filename, startpage=1):
|
||||||
|
"""
|
||||||
|
Copy filename to a tempfile, write pages startpage..N to filename.
|
||||||
|
|
||||||
|
:param filename: PDF filepath
|
||||||
|
:param startpage: page number for the new first page
|
||||||
|
"""
|
||||||
|
# Copy the pdf to a tmp file
|
||||||
|
tmp = tempfile.NamedTemporaryFile()
|
||||||
|
shutil.copy(filename, tmp.name)
|
||||||
|
|
||||||
|
# Read the copied pdf
|
||||||
|
try:
|
||||||
|
input_file = PdfFileReader(open(tmp.name, 'rb'))
|
||||||
|
except PdfReadError:
|
||||||
|
_fixPdf(filename, tmp.name)
|
||||||
|
input_file = PdfFileReader(open(tmp.name, 'rb'))
|
||||||
|
# Seek for the number of pages
|
||||||
|
num_pages = input_file.getNumPages()
|
||||||
|
|
||||||
|
# Write pages excepted the first one
|
||||||
|
output_file = PdfFileWriter()
|
||||||
|
for i in range(startpage, num_pages):
|
||||||
|
output_file.addPage(input_file.getPage(i))
|
||||||
|
|
||||||
|
tmp.close()
|
||||||
|
outputStream = open(filename, "wb")
|
||||||
|
output_file.write(outputStream)
|
Loading…
Reference in New Issue
Block a user