Added experimental support for markdown

This commit is contained in:
Phyks 2014-01-22 02:24:20 +01:00
parent 73271e28b8
commit c7edcd3fed

View File

@ -26,16 +26,145 @@ import datetime
import subprocess import subprocess
import re import re
import locale import locale
import markdown
from hashlib import md5
from functools import cmp_to_key from functools import cmp_to_key
from time import gmtime, strftime, mktime from time import gmtime, strftime, mktime
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# ========================
# Github Flavored Markdown
# ========================
def gfm(text):
# Extract pre blocks.
extractions = {}
def pre_extraction_callback(matchobj):
digest = md5(matchobj.group(0)).hexdigest()
extractions[digest] = matchobj.group(0)
return "{gfm-extraction-%s}" % digest
pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
text = re.sub(pattern, pre_extraction_callback, text)
# Prevent foo_bar_baz from ending up with an italic word in the middle.
def italic_callback(matchobj):
s = matchobj.group(0)
if list(s).count('_') >= 2:
return s.replace('_', '\_')
return s
text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
# In very clear cases, let newlines become <br /> tags.
def newline_callback(matchobj):
if len(matchobj.group(1)) == 1:
return matchobj.group(0).rstrip() + ' \n'
else:
return matchobj.group(0)
pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
text = re.sub(pattern, newline_callback, text)
# Insert pre block extractions.
def pre_insert_callback(matchobj):
return '\n\n' + extractions[matchobj.group(1)]
text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback,
text)
return text
# Test suite.
try:
from nose.tools import assert_equal
except ImportError:
def assert_equal(a, b):
assert a == b, '%r != %r' % (a, b)
def test_single_underscores():
"""Don't touch single underscores inside words."""
assert_equal(
gfm('foo_bar'),
'foo_bar',
)
def test_underscores_code_blocks():
"""Don't touch underscores in code blocks."""
assert_equal(
gfm(' foo_bar_baz'),
' foo_bar_baz',
)
def test_underscores_pre_blocks():
"""Don't touch underscores in pre blocks."""
assert_equal(
gfm('<pre>\nfoo_bar_baz\n</pre>'),
'\n\n<pre>\nfoo_bar_baz\n</pre>',
)
def test_pre_block_pre_text():
"""Don't treat pre blocks with pre-text differently."""
a = '\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
b = 'hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
assert_equal(
gfm(a)[2:],
gfm(b)[3:],
)
def test_two_underscores():
"""Escape two or more underscores inside words."""
assert_equal(
gfm('foo_bar_baz'),
'foo\\_bar\\_baz',
)
def test_newlines_simple():
"""Turn newlines into br tags in simple cases."""
assert_equal(
gfm('foo\nbar'),
'foo \nbar',
)
def test_newlines_group():
"""Convert newlines in all groups."""
assert_equal(
gfm('apple\npear\norange\n\nruby\npython\nerlang'),
'apple \npear \norange\n\nruby \npython \nerlang',
)
def test_newlines_long_group():
"""Convert newlines in even long groups."""
assert_equal(
gfm('apple\npear\norange\nbanana\n\nruby\npython\nerlang'),
'apple \npear \norange \nbanana\n\nruby \npython \nerlang',
)
def test_newlines_list():
"""Don't convert newlines in lists."""
assert_equal(
gfm('# foo\n# bar'),
'# foo\n# bar',
)
assert_equal(
gfm('* foo\n* bar'),
'* foo\n* bar',
)
# ========= # =========
# Functions # Functions
# ========= # =========
# Test if a variable exists (== isset function in PHP) # Test if a variable exists (== isset function in PHP)
# ==================================================== # ====================================================
def isset(variable): def isset(variable):
@ -110,7 +239,8 @@ def latest_articles(directory, number):
"from git.") "from git.")
latest_articles = latest_articles.strip().split("\n") latest_articles = latest_articles.strip().split("\n")
latest_articles = [x for x in latest_articles if(isint(x[4:8]) and latest_articles = [x for x in latest_articles if(isint(x[4:8]) and
x.endswith(".html"))] (x.endswith(".html") or
x.endswith(".md")))]
latest_articles.sort(key=lambda x: (get_date(x)[4:8], get_date(x)[2:4], latest_articles.sort(key=lambda x: (get_date(x)[4:8], get_date(x)[2:4],
get_date(x)[:2], get_date(x)[9:]), get_date(x)[:2], get_date(x)[9:]),
reverse=True) reverse=True)
@ -289,7 +419,8 @@ for filename in list(added_files):
except ValueError: except ValueError:
pass pass
if ((not filename.endswith(".html") and not filename.endswith(".ignore")) if ((not filename.endswith(".html") and not filename.endswith(".ignore")
and not filename.endswith(".md"))
or direct_copy): or direct_copy):
# Note : this deal with CSS, images or footer file # Note : this deal with CSS, images or footer file
print("[INFO] (Direct copy) Copying directly the file " print("[INFO] (Direct copy) Copying directly the file "
@ -327,7 +458,8 @@ for filename in list(modified_files):
except ValueError: except ValueError:
pass pass
if ((not filename.endswith("html") and not filename.endswith("ignore")) if ((not filename.endswith(".html") and not filename.endswith(".ignore")
and not filename.endswith(".md"))
or direct_copy): or direct_copy):
print("[INFO] (Direct copy) Updating directly the file " print("[INFO] (Direct copy) Updating directly the file "
+ filename[4:]+" in blog dir.") + filename[4:]+" in blog dir.")
@ -336,7 +468,7 @@ for filename in list(modified_files):
modified_files.remove(filename) modified_files.remove(filename)
continue continue
if filename.endswith("ignore"): if filename.endswith(".ignore"):
print("[INFO] (Not published) Found not published article " print("[INFO] (Not published) Found not published article "
+ filename[4:-7]+".") + filename[4:-7]+".")
added_files.remove(filename) added_files.remove(filename)
@ -364,7 +496,8 @@ for filename in list(deleted_files):
except ValueError: except ValueError:
pass pass
if ((not filename.endswith("html") and not filename.endswith("ignore")) if ((not filename.endswith(".html") and not filename.endswith(".ignore")
and not filename.endswith(".md"))
or (isset("direct_delete") and direct_delete is True)): or (isset("direct_delete") and direct_delete is True)):
print("[INFO] (Deleted file) Delete directly copied file " print("[INFO] (Deleted file) Delete directly copied file "
+ filename[4:]+" in blog dir.") + filename[4:]+" in blog dir.")
@ -560,6 +693,10 @@ for filename in added_files+modified_files:
tags_comma += ("<a href=\""+params["BLOG_URL"] + tags_comma += ("<a href=\""+params["BLOG_URL"] +
"/tags/"+tag+".html\">"+tag+"</a>") "/tags/"+tag+".html\">"+tag+"</a>")
# Markdown support
if filename.endswith(".md"):
article = markdown.markdown(gfm(article))
# Write generated HTML for this article in gen / # Write generated HTML for this article in gen /
article = replace_tags(article, search_list, replace_list) article = replace_tags(article, search_list, replace_list)