#!/usr/bin/env python3 # Blogit script written by Phyks (Lucas Verney) for his personnal use. I # distribute it with absolutely no warranty, except that it works for me on my # blog :) # This script is a pre-commit hook that should be placed in your .git/hooks # folder to work. Read README file for more info. # LICENSE : # ----------------------------------------------------------------------------- # "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42): # Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice # you can do whatever you want with this stuff (and you can also do whatever # you want with this stuff without retaining it, but that's not cool...). If # we meet some day, and you think this stuff is worth it, you can buy me a # beer soda in return. # Phyks # ---------------------------------------------------------------------------- import sys import getopt import shutil import os import datetime import subprocess import re import locale from time import gmtime, strftime, mktime from bs4 import BeautifulSoup # ========= # Functions # ========= # Test if a variable exists (== isset function in PHP) # ==================================================== def isset(variable): return variable in locals() or variable in globals() # Test wether a variable is an int or not # ======================================= def isint(variable): try: int(variable) return True except ValueError: return False # List all files in path directory # Works recursively # Return files list with path relative to current dir # =================================================== def list_directory(path): fichier = [] for root, dirs, files in os.walk(path): for i in files: fichier.append(os.path.join(root, i)) return fichier # Return a list with the tags of a given article # ============================================== def get_tags(filename): try: with open(filename, 'r') as fh: tag_line = '' for line in fh.readlines(): if "@tags=" in line: tag_line = line break if not tag_line: return [] tags = [x.strip() for x in line[line.find("@tags=")+6:].split(",")] return tags except IOError: sys.exit("[ERROR] Unable to open file "+filename+".") #Return date of an article # ======================== def get_date(filename): try: with open(filename, 'r') as fh: for line in fh.readlines(): if "@date=" in line: return line[line.find("@date=")+6:].strip() sys.exit("[ERROR] Unable to determine date in article "+filename+".") except IOError: sys.exit("[ERROR] Unable to open file "+filename+".") # Return the _number_ latest articles in _dir_ directory # ====================================================== def latest_articles(directory, number): try: latest_articles = subprocess.check_output(["git", "ls-files", directory], universal_newlines=True) except: sys.exit("[ERROR] An error occurred when fetching file changes " "from git.") latest_articles = latest_articles.strip().split("\n") latest_articles = [x for x in latest_articles if isint(x[4:8])] latest_articles.sort(key=lambda x: get_date(x), reverse=True) return latest_articles[:number] # Auto create necessary directories to write a file # ================================================= def auto_dir(path): directory = os.path.dirname(path) try: if not os.path.exists(directory): os.makedirs(directory) except IOError: sys.exit("[ERROR] An error occurred while creating "+path+" file " "and parent dirs.") # Replace some user specific syntax tags (to repplace smileys for example) # ======================================================================== def replace_tags(article, search_list, replace_list): return_string = article for search, replace in zip(search_list, replace_list): return_string = re.sub(search, replace, article) return return_string # Return text in
for rss description # ======================================================== def get_text_rss(content): soup = BeautifulSoup(content) date = soup.find(attrs={'class': 'date'}) date.extract() title = soup.find(attrs={'class': 'article_title'}) title.extract() return str(soup.div) # Set locale locale.setlocale(locale.LC_ALL, '') # ======================== # Start of the main script # ======================== try: opts, args = getopt.gnu_getopt(sys.argv, "hf", ["help", "force-regen"]) except getopt.GetoptError: sys.exit("[ERROR] Unable to parse command line arguments. " "See pre-commit -h for more infos on how to use.") force_regen = False for opt, arg in opts: if opt in ("-h", "--help"): print("Usage :") print("This should be called automatically as a pre-commit git hook. " "You can also launch it manually right before commiting.\n") print("This script generates static pages ready to be served behind " "your webserver.\n") print("Usage :") print("-h \t --help \t displays this help message.") print("-f \t --force-regen \t force complete rebuild of all pages.") sys.exit(0) elif opt in ("-f", "--force-regen"): force_regen = True # Set parameters with params file search_list = [] replace_list = [] months = ["January", "February", "March", "April", "May", "Juin", "July", "August", "September", "October", "November", "December"] try: with open("raw/params", "r") as params_fh: params = {} for line in params_fh.readlines(): if line.strip() == "" or line.strip().startswith("#"): continue option, value = line.split("=", 1) option = option.strip() if option == "SEARCH": search_list = [i.strip() for i in value.split(",")] elif option == "REPLACE": replace_list = [i.strip() for i in value.split(",")] elif option == "MONTHS": months = [i.strip() for i in value.split(",")] elif option == "IGNORE_FILES": params["IGNORE_FILES"] = [i.strip() for i in value.split(",")] elif option == "BLOG_URL": params["BLOG_URL"] = value.strip(" \n\t\r/") else: params[option.strip()] = value.strip() print("[INFO] Parameters set from raw/params file.") except IOError: sys.exit("[ERROR] Unable to load raw/params file which defines important " "parameters. Does such a file exist ? See doc for more info " "on this file.") print("[INFO] Blog url is "+params["BLOG_URL"]+".") # Fill lists for modified, deleted and added files modified_files = [] deleted_files = [] added_files = [] #Lists of years and months with modified files years_list = [] months_list = [] if not force_regen: # Find the changes to be committed try: changes = subprocess.check_output(["git", "diff", "--cached", "--name-status"], universal_newlines=True) except: sys.exit("[ERROR] An error occurred when fetching file changes " "from git.") changes = changes.strip().split("\n") if changes == [""]: sys.exit("[ERROR] Nothing to do... Did you add new files with " "\"git add\" before ?") for changed_file in changes: if changed_file[0].startswith("A"): added_files.append(changed_file[changed_file.index("\t")+1:]) elif changed_file[0].startswith("M"): modified_files.append(changed_file[changed_file.index("\t")+1:]) elif changed_file[0].startswith("D"): deleted_files.append(changed_file[changed_file.index("\t")+1:]) else: sys.exit("[ERROR] An error occurred when running git diff.") else: try: shutil.rmtree("blog/") shutil.rmtree("gen/") added_files = list_directory("raw") except FileNotFoundError: pass if not added_files and not modified_files and not deleted_files: sys.exit("[ERROR] Nothing to do... Did you add new files with " "\"git add\" before ?") # Only keep modified raw articles files for filename in list(added_files): direct_copy = False if (not filename.startswith("raw/") or filename.endswith("~") or filename in params["IGNORE_FILES"]): added_files.remove(filename) continue try: int(filename[4:8]) if filename[4:8] not in years_list: years_list.append(filename[4:8]) except ValueError: direct_copy = True try: int(filename[9:11]) if filename[9:11] not in months_list: months_list.append(filename[9:11]) except ValueError: pass if ((not filename.endswith(".html") and not filename.endswith(".ignore")) or direct_copy): # Note : this deal with CSS, images or footer file print("[INFO] (Direct copy) Copying directly the file " + filename[4:]+" to blog dir.") auto_dir("blog/"+filename[4:]) shutil.copy(filename, "blog/"+filename[4:]) added_files.remove(filename) continue if filename.endswith(".ignore"): print("[INFO] (Not published) Found not published article " + filename[4:-7]+".") added_files.remove(filename) continue for filename in list(modified_files): direct_copy = False if (not filename.startswith("raw/") or filename.endswith("~") or filename in params["IGNORE_FILES"]): modified_files.remove(filename) continue try: int(filename[4:8]) if filename[4:8] not in years_list: years_list.append(filename[4:8]) except ValueError: direct_copy = True try: int(filename[9:11]) if filename[9:11] not in months_list: months_list.append(filename[9:11]) except ValueError: pass if ((not filename.endswith("html") and not filename.endswith("ignore")) or direct_copy): print("[INFO] (Direct copy) Updating directly the file " + filename[4:]+" in blog dir.") auto_dir("blog/"+filename[4:]) shutil.copy(filename, "blog/"+filename[4:]) modified_files.remove(filename) continue if filename.endswith("ignore"): print("[INFO] (Not published) Found not published article " + filename[4:-7]+".") added_files.remove(filename) continue for filename in list(deleted_files): direct_copy = False if (not filename.startswith("raw/") or filename.endswith("~") or filename in params["IGNORE_FILES"]): deleted_files.remove(filename) continue try: int(filename[4:8]) if filename[4:8] not in years_list: years_list.append(filename[4:8]) except ValueError: direct_delete = True try: int(filename[9:11]) if filename[9:11] not in months_list: months_list.append(filename[9:11]) except ValueError: pass if ((not filename.endswith("html") and not filename.endswith("ignore")) or direct_delete): print("[INFO] (Deleted file) Delete directly copied file " + filename[4:]+" in blog dir.") os.unlink(filename) deleted_files.remove(filename) continue print("[INFO] Added files : "+", ".join(added_files)) print("[INFO] Modified files : "+", ".join(modified_files)) print("[INFO] Deleted filed : "+", ".join(deleted_files)) print("[INFO] Updating tags for added and modified files.") for filename in added_files: tags = get_tags(filename) if not tags: sys.exit("[ERROR] (TAGS) In added article "+filename[4:]+" : " "No tags found !") for tag in tags: try: auto_dir("gen/tags/"+tag+".tmp") with open("gen/tags/"+tag+".tmp", 'a+') as tag_file: tag_file.seek(0) if filename[4:] not in tag_file.read(): tag_file.write(filename[4:]+"\n") print("[INFO] (TAGS) Found tag "+tag+" in article " + filename[4:]) except IOError: sys.exit("[ERROR] (TAGS) New tag found but an error " "occurred in article "+filename[4:]+": "+tag+".") for filename in modified_files: try: tags = get_tags(filename) except IOError: sys.exit("[ERROR] Unable to open file "+filename[4:]+".") if not tags: sys.exit("[ERROR] (TAGS) In modified article "+filename[4:]+" : " " No tags found !") for tag in list_directory("gen/tags/"): try: with open(tag, 'r+') as tag_file: if (tag[tag.index("tags/") + 5:tag.index(".tmp")] in tags and filename[4:] not in tag_file.read()): tag_file.seek(0, 2) # Append to end of file tag_file.write(filename[4:]+"\n") print("[INFO] (TAGS) Found new tag " + tag[:tag.index(".tmp")]+" for modified article " + filename[4:]+".") tags.remove(tag_file[9:]) if (tag[tag.index("tags/") + 5:tag.index(".tmp")] not in tags and filename[4:] in tag_file.read()): tag_old = tag_file.read() tag_file.truncate() # Delete file in tag tag_file_write = tag_old.replace(filename[4:]+"\n", "") if tag_file_write: tag_file.write(tag_file_write) print("[INFO] (TAGS) Deleted tag " + tag[:tag.index(".tmp")]+" in modified article " + filename[4:]+".") else: try: os.unlink(tag) print("[INFO] (TAGS) No more article with tag " + tag[8:-4]+", deleting it.") except FileNotFoundError: print("[INFO] (TAGS) "+tag+" was found to be empty" " but there was an error during deletion. " "You should check manually.") tags.remove(tag_file[9:]) except IOError: sys.exit("[ERROR] (TAGS) An error occurred when parsing tags " " of article "+filename[4:]+".") for tag in tags: # New tags created try: auto_dir("gen/tags/"+tag+".tmp") with open("gen/tags/"+tag+".tmp", "a+") as tag_file: # Delete tag file here if empty after deletion tag_file.write(filename[4:]+"\n") print("[INFO] (TAGS) Found new tag "+tag+" for " "modified article "+filename[4:]+".") except IOError: sys.exit("[ERROR] (TAGS) An error occurred when parsing tags " "of article "+filename[4:]+".") # Delete tags for deleted files and delete all generated files for filename in deleted_files: tags = get_tags(filename) if not tags: sys.exit("[ERROR] In deleted article "+filename[4:]+" : " "No tags found !") for tag in tags: try: with open("gen/tags/"+tag+".tmp", 'r+') as tag_file: tag_old = tag_file.read() tag_file.truncate() # Delete file in tag tag_file_write = tag_old.replace(filename[4:]+"\n", "") if tag_file_write: tag_file.write(tag_file_write) print("[INFO] (TAGS) Deleted tag " + tag[:tag.index(".tmp")]+" in deleted article " + filename[4:]+".") except IOError: sys.exit("[ERROR] An error occurred while deleting article" + filename[4:]+" from tags files.") if not tag_file_write: try: os.unlink(tag) print("[INFO] (TAGS) No more article with tag " + tag[8:-4]+", deleting it.") except FileNotFoundError: print("[INFO] (TAGS) "+tag+" was found to be empty " "but there was an error during deletion. " "You should check manually.") # Delete generated files try: os.unlink("gen/"+filename[4:-5]+".gen") os.unlink("blog/"+filename[4:]) except FileNotFoundError: print("[INFO] (DELETION) Article "+filename[4:]+" seems " "to not have already been generated. " "You should check manually.") print("[INFO] (DELETION) Deleted article "+filename[4:] + " in both gen and blog directories") # Common lists that are used multiple times last_articles = latest_articles("raw/", int(params["NB_ARTICLES_INDEX"])) tags_full_list = list_directory("gen/tags") # Generate html for each article (gen/ dir) for filename in added_files+modified_files: try: with open(filename, 'r') as fh: article, title, date, author, tags = "", "", "", "", "" for line in fh.readlines(): article += line if "@title=" in line: title = line[line.find("@title=")+7:].strip() continue if "@date=" in line: date = line[line.find("@date=")+6:].strip() continue if "@author=" in line: author = line[line.find("@author=")+7:].strip() continue if "@tags=" in line: tags = line[line.find("@tags=")+6:].strip() continue except IOError: print("[ERROR] An error occurred while generating article " + filename[4:]+".") if not isset("tags") or not isset("title") or not isset("author"): sys.exit("[ERROR] Missing parameters (title, author, date, tags) " "in article "+filename[4:]+".") date_readable = ("Le "+date[0:2]+"/"+date[2:4]+"/"+date[4:8] + " à "+date[9:11]+":"+date[11:13]) day_aside = date[0:2] month_aside = months[int(date[2:4]) - 1] tags_comma = "" tags = [i.strip() for i in tags.split(",")] for tag in tags: if tags_comma != "": tags_comma += ", " tags_comma += (""+tag+"") # Write generated HTML for this article in gen / article = replace_tags(article, search_list, replace_list) try: auto_dir("gen/"+filename[4:-5]+".gen") with open("gen/"+filename[4:-5]+".gen", 'w') as article_file: article_file.write("
\n" "\t\n" "\t
\n" "\t\t

"+title+"

\n" "\t\t"+article+"\n" "\t\t

"+date_readable+"

\n" "\t\t

Tags : "+tags_comma + "

\n" "\t
\n" "
\n") print("[INFO] (GEN ARTICLES) Article "+filename[4:]+" generated") except IOError: sys.exit("[ERROR] An error occurred when writing generated HTML for " "article "+filename[4:]+".") # Starting to generate header file (except title) tags_header = "" for tag in tags_full_list: tags_header += "
" tags_header += ("\"test\"") tags_header += (""+tag[9:-4]+"") tags_header += "
" try: with open("raw/header.html", "r") as header_fh: header = header_fh.read() except IOError: sys.exit("[ERROR] Unable to open raw/header.html file.") header = header.replace("@tags", tags_header, 1) header = header.replace("@blog_url", params["BLOG_URL"]) articles_header = "" articles_index = "" rss = ("\n" "\n") rss += ("\t" "\t\t\n" "\t\t"+params["BLOG_TITLE"]+"\n" "\t\t"+params["BLOG_URL"]+"\n" "\t\t"+params["DESCRIPTION"]+"\n" "\t\t"+params["LANGUAGE"]+"\n" "\t\t"+params["COPYRIGHT"]+"\n" "\t\t"+params["WEBMASTER"]+"\n" "\t\t" + strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())+"\n") # Generate header (except title) + index file + rss file for i, article in enumerate(["gen/"+x[4:-5]+".gen" for x in last_articles]): content, title, tags, date, author = "", "", "", "", "" try: with open(article, "r") as fh: for line in fh.readlines(): content += line if "@title=" in line: title = line[line.find("@title=")+7:].strip() continue if "@date=" in line: date = line[line.find("@date=")+6:].strip() continue if "@author=" in line: author = line[line.find("@author=")+7:].strip() continue if "@tags=" in line: tags = line[line.find("@tags=")+6:].strip() continue except IOError: sys.exit("[ERROR] Unable to open "+article+" file.") if not isset("title"): sys.exit("[ERROR] No title found in article "+article[4:]+".") if i < 5: articles_header += "
  • " articles_header += (""+title+"") articles_header += "
  • " articles_index += content date_rss = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime(mktime(datetime.datetime.strptime(date, "%d%m%Y-%H%M") .timetuple()))) rss += ("\t\t\n" "\t\t\t"+title+"\n" "\t\t\t"+params["BLOG_URL"]+"/"+article[5:]+"\n" "\t\t\t" + params["BLOG_URL"]+"/"+article[5:]+"\n" "\t\t\t\n" "\t\t\t"+date_rss+"\n" "\t\t\t"+', '.join(tags)+"\n" "\t\t\t"+params["WEBMASTER"]+"\n" "\t\t\n") # Finishing header gen articles_header += ("
  • "+"Archives
  • ") header = header.replace("@articles", articles_header, 1) try: auto_dir("gen/header.gen") with open("gen/header.gen", "w") as header_gen_fh: header_gen_fh.write(header) print("[INFO] (HEADER) Header has been generated successfully.") except FileNotFoundError: sys.exit("[ERROR] An error occurred while writing header file.") except IOError: sys.exit("[ERROR] Unable to open gen/header.gen for writing.") # Getting content from footer file try: with open("raw/footer.html", "r") as footer_fh: footer = footer_fh.read() except IOError: sys.exit("[ERROR] An error occurred while parsing footer " "file raw/footer.html.") # Finishing index gen index = (header.replace("@title", params["BLOG_TITLE"], 1) + articles_index + "

    Archives

    "+footer) try: with open("blog/index.html", "w") as index_fh: index_fh.write(index) print("[INFO] (INDEX) Index page has been generated successfully.") except IOError: sys.exit("[ERROR] Error while creating index.html file") except IOError: sys.exit("[ERROR] Unable to open index.html file for writing.") # Finishing rss gen rss += "\t
    \n
    " try: with open("blog/rss.xml", "w") as rss_fh: rss_fh.write(rss) except IOError: sys.exit("[ERROR] An error occurred while writing RSS file.") # Regenerate tags pages for tag in tags_full_list: tag_content = header.replace("@title", params["BLOG_TITLE"] + " - "+tag[4:-4], 1) with open(tag, "r") as tag_gen_fh: for line in tag_gen_fh.readlines(): line = line.replace(".html", ".gen") with open("gen/"+line.strip(), "r") as article_fh: tag_content += article_fh.read() tag_content += footer try: auto_dir(tag.replace("gen/", "blog/")) with open(tag.replace("gen/", "blog/")[:-4]+".html", "w") as tag_fh: tag_fh.write(tag_content) print("[INFO] (TAGS) Tag page for "+tag[9:-4] + " has been generated successfully.") except IOError: sys.exit("[ERROR] An error occurred while generating tag page \"" + tag[9:-4]+"\"") # Finish generating HTML for articles (blog/ dir) for article in added_files+modified_files: try: with open("gen/"+article[4:-5]+".gen", "r") as article_fh: content = article_fh.read() except IOError: sys.exit("[ERROR] An error occurred while opening" "gen/"+article[4:-5]+".gen file.") for line in content.split("\n"): if "@title=" in line: title = line[line.find("@title=")+7:].strip() break content = header.replace("@title", params["BLOG_TITLE"] + " - " + title, 1) + content + footer try: auto_dir("blog/"+article[4:]) with open("blog/"+article[4:], "w") as article_fh: article_fh.write(content) print("[INFO] (GEN ARTICLES) HTML file generated in blog dir for " "article "+article[4:]+".") except IOError: sys.exit("[ERROR] Unable to write blog/"+article[4:]+" file.") # Regenerate pages for years / months years_list.sort(reverse=True) for i in years_list: try: int(i) except ValueError: continue # Generate pages per year page_year = header.replace("@title", params["BLOG_TITLE"]+" - "+i, 1) months_list.sort(reverse=True) for j in months_list: if not os.path.isdir("blog/"+i+"/"+j): continue # Generate pages per month page_month = header.replace("@title", params["BLOG_TITLE"]+" - "+i+"/"+j, 1) articles_list = list_directory("gen/"+i+"/"+j) articles_list.sort(key=lambda x: get_date(x), reverse=True) for article in articles_list: try: with open(article, "r") as article_fh: article_content = replace_tags(article_fh.read(), search_list, replace_list) page_month += article_content page_year += article_content except IOError: sys.exit("[ERROR] Error while generating years and " "months pages. Check your gen folder, you " "may need to regenerate some articles. The " "error was due to "+article+".") page_month += footer try: with open("blog/"+i+"/"+j+"/index.html", "w") as page_month_fh: page_month_fh.write(page_month) except IOError: sys.exit("[ERROR] Unable to write index file for "+i+"/"+j+".") page_year += footer try: with open("blog/"+i+"/index.html", "w") as page_year_fh: page_year_fh.write(page_year) except IOError: sys.exit("[ERROR] Unable to write index file for "+i+".") # Generate archive page archives = header.replace("@title", params["BLOG_TITLE"]+" - Archives", 1) years_list = os.listdir("blog/") years_list.sort(reverse=True) archives += ("

    Archives

    " archives += footer try: with open("blog/archives.html", "w") as archives_fh: archives_fh.write(archives) except IOError: sys.exit("[ERROR] Unable to write blog/archives.html file.") # Include header and footer for pages that need it for i in os.listdir("blog/"): if (os.path.isdir("blog/"+i) or i in ["header.html", "footer.html", "rss.xml", "style.css", "index.html", "archives.html", "humans.txt"]): continue with open("blog/"+i, 'r+') as fh: content = fh.read() fh.seek(0) if content.find("#include_header_here") != -1: content = content.replace("#include_header_here", header.replace("@title", (params["BLOG_TITLE"] + " - "+i[:-5].title()), 1), 1) fh.write(content) fh.seek(0) if content.find("#include_footer_here") != -1: fh.write(content.replace("#include_footer_here", footer, 1))