Added experimental support for markdown

2014-01-22 02:24:20 +01:00 · 2014-01-22 02:24:20 +01:00 · c7edcd3fed
commit c7edcd3fed
parent 73271e28b8
1 changed files with 142 additions and 5 deletions
--- a/pre-commit.py
+++ b/pre-commit.py
@ -26,16 +26,145 @@ import datetime
 import subprocess
 import re
 import locale
 import markdown
 from hashlib import md5
 from functools import cmp_to_key
 from time import gmtime, strftime, mktime
 from bs4 import BeautifulSoup
 # ========================
 # Github Flavored Markdown
 # ========================
 def gfm(text):
    # Extract pre blocks.
    extractions = {}
    def pre_extraction_callback(matchobj):
        digest = md5(matchobj.group(0)).hexdigest()
        extractions[digest] = matchobj.group(0)
        return "{gfm-extraction-%s}" % digest
    pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
    text = re.sub(pattern, pre_extraction_callback, text)
    # Prevent foo_bar_baz from ending up with an italic word in the middle.
    def italic_callback(matchobj):
        s = matchobj.group(0)
        if list(s).count('_') >= 2:
            return s.replace('_', '\_')
        return s
    text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
    # In very clear cases, let newlines become <br /> tags.
    def newline_callback(matchobj):
        if len(matchobj.group(1)) == 1:
            return matchobj.group(0).rstrip() + ' \n'
        else:
            return matchobj.group(0)
    pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
    text = re.sub(pattern, newline_callback, text)
    # Insert pre block extractions.
    def pre_insert_callback(matchobj):
        return '\n\n' + extractions[matchobj.group(1)]
    text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback,
                  text)
    return text
 # Test suite.
 try:
    from nose.tools import assert_equal
 except ImportError:
    def assert_equal(a, b):
        assert a == b, '%r != %r' % (a, b)
 def test_single_underscores():
    """Don't touch single underscores inside words."""
    assert_equal(
        gfm('foo_bar'),
        'foo_bar',
    )
 def test_underscores_code_blocks():
    """Don't touch underscores in code blocks."""
    assert_equal(
        gfm(' foo_bar_baz'),
        ' foo_bar_baz',
    )
 def test_underscores_pre_blocks():
    """Don't touch underscores in pre blocks."""
    assert_equal(
        gfm('<pre>\nfoo_bar_baz\n</pre>'),
        '\n\n<pre>\nfoo_bar_baz\n</pre>',
    )
 def test_pre_block_pre_text():
    """Don't treat pre blocks with pre-text differently."""
    a = '\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
    b = 'hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
    assert_equal(
        gfm(a)[2:],
        gfm(b)[3:],
    )
 def test_two_underscores():
    """Escape two or more underscores inside words."""
    assert_equal(
        gfm('foo_bar_baz'),
        'foo\\_bar\\_baz',
    )
 def test_newlines_simple():
    """Turn newlines into br tags in simple cases."""
    assert_equal(
        gfm('foo\nbar'),
        'foo \nbar',
    )
 def test_newlines_group():
    """Convert newlines in all groups."""
    assert_equal(
        gfm('apple\npear\norange\n\nruby\npython\nerlang'),
        'apple \npear \norange\n\nruby \npython \nerlang',
    )
 def test_newlines_long_group():
    """Convert newlines in even long groups."""
    assert_equal(
        gfm('apple\npear\norange\nbanana\n\nruby\npython\nerlang'),
        'apple \npear \norange \nbanana\n\nruby \npython \nerlang',
    )
 def test_newlines_list():
    """Don't convert newlines in lists."""
    assert_equal(
        gfm('# foo\n# bar'),
        '# foo\n# bar',
    )
    assert_equal(
        gfm('* foo\n* bar'),
        '* foo\n* bar',
    )
 # =========
 # Functions
 # =========
 # Test if a variable exists (== isset function in PHP)
 # ====================================================
 def isset(variable):
@ -110,7 +239,8 @@ def latest_articles(directory, number):
                 "from git.")
    latest_articles = latest_articles.strip().split("\n")
    latest_articles = [x for x in latest_articles if(isint(x[4:8]) and
-                                                     x.endswith(".html"))]
+                                                     (x.endswith(".html") or
                                                      x.endswith(".md")))]
    latest_articles.sort(key=lambda x: (get_date(x)[4:8], get_date(x)[2:4],
                                        get_date(x)[:2], get_date(x)[9:]),
                         reverse=True)
@ -289,7 +419,8 @@ for filename in list(added_files):
    except ValueError:
        pass
-    if ((not filename.endswith(".html") and not filename.endswith(".ignore"))
+    if ((not filename.endswith(".html") and not filename.endswith(".ignore")
        and not filename.endswith(".md"))
       or direct_copy):
        # Note : this deal with CSS, images or footer file
        print("[INFO] (Direct copy) Copying directly the file "
@ -327,7 +458,8 @@ for filename in list(modified_files):
    except ValueError:
        pass
-    if ((not filename.endswith("html") and not filename.endswith("ignore"))
+    if ((not filename.endswith(".html") and not filename.endswith(".ignore")
        and not filename.endswith(".md"))
       or direct_copy):
        print("[INFO] (Direct copy) Updating directly the file "
              + filename[4:]+" in blog dir.")
@ -336,7 +468,7 @@ for filename in list(modified_files):
        modified_files.remove(filename)
        continue
-    if filename.endswith("ignore"):
+    if filename.endswith(".ignore"):
        print("[INFO] (Not published) Found not published article "
              + filename[4:-7]+".")
        added_files.remove(filename)
@ -364,7 +496,8 @@ for filename in list(deleted_files):
    except ValueError:
        pass
-    if ((not filename.endswith("html") and not filename.endswith("ignore"))
+    if ((not filename.endswith(".html") and not filename.endswith(".ignore")
        and not filename.endswith(".md"))
       or (isset("direct_delete") and direct_delete is True)):
        print("[INFO] (Deleted file) Delete directly copied file "
              + filename[4:]+" in blog dir.")
@ -560,6 +693,10 @@ for filename in added_files+modified_files:
        tags_comma += ("<a href=\""+params["BLOG_URL"] +
                       "/tags/"+tag+".html\">"+tag+"</a>")
    # Markdown support
    if filename.endswith(".md"):
        article = markdown.markdown(gfm(article))
    # Write generated HTML for this article in gen /
    article = replace_tags(article, search_list, replace_list)