diff --git a/pre-commit.py b/pre-commit.py index b7a9f37..0ad23c2 100755 --- a/pre-commit.py +++ b/pre-commit.py @@ -26,16 +26,145 @@ import datetime import subprocess import re import locale +import markdown +from hashlib import md5 from functools import cmp_to_key from time import gmtime, strftime, mktime from bs4 import BeautifulSoup +# ======================== +# Github Flavored Markdown +# ======================== + +def gfm(text): + # Extract pre blocks. + extractions = {} + + def pre_extraction_callback(matchobj): + digest = md5(matchobj.group(0)).hexdigest() + extractions[digest] = matchobj.group(0) + return "{gfm-extraction-%s}" % digest + pattern = re.compile(r'
.*?
', re.MULTILINE | re.DOTALL) + text = re.sub(pattern, pre_extraction_callback, text) + + # Prevent foo_bar_baz from ending up with an italic word in the middle. + def italic_callback(matchobj): + s = matchobj.group(0) + if list(s).count('_') >= 2: + return s.replace('_', '\_') + return s + text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text) + + # In very clear cases, let newlines become
tags. + def newline_callback(matchobj): + if len(matchobj.group(1)) == 1: + return matchobj.group(0).rstrip() + ' \n' + else: + return matchobj.group(0) + pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE) + text = re.sub(pattern, newline_callback, text) + + # Insert pre block extractions. + def pre_insert_callback(matchobj): + return '\n\n' + extractions[matchobj.group(1)] + text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback, + text) + + return text + + +# Test suite. +try: + from nose.tools import assert_equal +except ImportError: + def assert_equal(a, b): + assert a == b, '%r != %r' % (a, b) + + +def test_single_underscores(): + """Don't touch single underscores inside words.""" + assert_equal( + gfm('foo_bar'), + 'foo_bar', + ) + + +def test_underscores_code_blocks(): + """Don't touch underscores in code blocks.""" + assert_equal( + gfm(' foo_bar_baz'), + ' foo_bar_baz', + ) + + +def test_underscores_pre_blocks(): + """Don't touch underscores in pre blocks.""" + assert_equal( + gfm('
\nfoo_bar_baz\n
'), + '\n\n
\nfoo_bar_baz\n
', + ) + + +def test_pre_block_pre_text(): + """Don't treat pre blocks with pre-text differently.""" + a = '\n\n
\nthis is `a\\_test` and this\\_too\n
' + b = 'hmm
\nthis is `a\\_test` and this\\_too\n
' + assert_equal( + gfm(a)[2:], + gfm(b)[3:], + ) + + +def test_two_underscores(): + """Escape two or more underscores inside words.""" + assert_equal( + gfm('foo_bar_baz'), + 'foo\\_bar\\_baz', + ) + + +def test_newlines_simple(): + """Turn newlines into br tags in simple cases.""" + assert_equal( + gfm('foo\nbar'), + 'foo \nbar', + ) + + +def test_newlines_group(): + """Convert newlines in all groups.""" + assert_equal( + gfm('apple\npear\norange\n\nruby\npython\nerlang'), + 'apple \npear \norange\n\nruby \npython \nerlang', + ) + + +def test_newlines_long_group(): + """Convert newlines in even long groups.""" + assert_equal( + gfm('apple\npear\norange\nbanana\n\nruby\npython\nerlang'), + 'apple \npear \norange \nbanana\n\nruby \npython \nerlang', + ) + + +def test_newlines_list(): + """Don't convert newlines in lists.""" + assert_equal( + gfm('# foo\n# bar'), + '# foo\n# bar', + ) + assert_equal( + gfm('* foo\n* bar'), + '* foo\n* bar', + ) + # ========= # Functions # ========= + # Test if a variable exists (== isset function in PHP) # ==================================================== def isset(variable): @@ -110,7 +239,8 @@ def latest_articles(directory, number): "from git.") latest_articles = latest_articles.strip().split("\n") latest_articles = [x for x in latest_articles if(isint(x[4:8]) and - x.endswith(".html"))] + (x.endswith(".html") or + x.endswith(".md")))] latest_articles.sort(key=lambda x: (get_date(x)[4:8], get_date(x)[2:4], get_date(x)[:2], get_date(x)[9:]), reverse=True) @@ -289,7 +419,8 @@ for filename in list(added_files): except ValueError: pass - if ((not filename.endswith(".html") and not filename.endswith(".ignore")) + if ((not filename.endswith(".html") and not filename.endswith(".ignore") + and not filename.endswith(".md")) or direct_copy): # Note : this deal with CSS, images or footer file print("[INFO] (Direct copy) Copying directly the file " @@ -327,7 +458,8 @@ for filename in list(modified_files): except ValueError: pass - if ((not filename.endswith("html") and not filename.endswith("ignore")) + if ((not filename.endswith(".html") and not filename.endswith(".ignore") + and not filename.endswith(".md")) or direct_copy): print("[INFO] (Direct copy) Updating directly the file " + filename[4:]+" in blog dir.") @@ -336,7 +468,7 @@ for filename in list(modified_files): modified_files.remove(filename) continue - if filename.endswith("ignore"): + if filename.endswith(".ignore"): print("[INFO] (Not published) Found not published article " + filename[4:-7]+".") added_files.remove(filename) @@ -364,7 +496,8 @@ for filename in list(deleted_files): except ValueError: pass - if ((not filename.endswith("html") and not filename.endswith("ignore")) + if ((not filename.endswith(".html") and not filename.endswith(".ignore") + and not filename.endswith(".md")) or (isset("direct_delete") and direct_delete is True)): print("[INFO] (Deleted file) Delete directly copied file " + filename[4:]+" in blog dir.") @@ -560,6 +693,10 @@ for filename in added_files+modified_files: tags_comma += (""+tag+"") + # Markdown support + if filename.endswith(".md"): + article = markdown.markdown(gfm(article)) + # Write generated HTML for this article in gen / article = replace_tags(article, search_list, replace_list)