The git repo behind my blog.

script.py 2.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #!/usr/bin/env python3
  2. from bs4 import BeautifulSoup
  3. from bs4.element import Comment
  4. import os
  5. import base64
  6. import hashlib
  7. import hmac
  8. import requests
  9. """
  10. Script to import all the articles from this blog in Known.
  11. Must be run with a correct API_KEY (see below) and from the `gen/` folder.
  12. """
  13. def list_directory(path):
  14. fichier = []
  15. for root, dirs, files in os.walk(path):
  16. for i in files:
  17. fichier.append(os.path.join(root, i))
  18. return fichier
  19. def hmac_sha256(message, key):
  20. return base64.b64encode(hmac.new(key.encode("utf-8"),
  21. message.encode("utf-8"),
  22. digestmod=hashlib.sha256)
  23. .digest()).decode("utf-8")
  24. def known_api(username, api_key, type, payload):
  25. headers = {
  26. "X-KNOWN-USERNAME": username,
  27. "X-KNOWN-SIGNATURE": hmac_sha256("/"+type+"/edit", api_key)
  28. }
  29. return requests.post("https://known.phyks.me/"+type+"/edit",
  30. data=payload,
  31. headers=headers)
  32. for file in list_directory("2013") + list_directory("2014"):
  33. print("Processing file "+file)
  34. with open(file, 'r') as fh:
  35. soup = BeautifulSoup(fh.read())
  36. content = []
  37. for i in soup.div.find('header').next_siblings:
  38. if i.name == "footer":
  39. break
  40. if type(i) != Comment:
  41. content.append(i)
  42. comment = soup.div.findAll(text=lambda text: isinstance(text,
  43. Comment))
  44. comment = [i.strip() for i in comment[0].strip().split('\n')]
  45. for j in comment:
  46. if j.startswith("@title"):
  47. title = j.split("=")[1]
  48. elif j.startswith("@date"):
  49. date = j.split("=")[1]
  50. elif j.startswith("@tags"):
  51. tags = j.split("=")[1]
  52. tags = ', '.join(["#"+i.strip() for i in tags.split(',')])
  53. meta = {
  54. "title": title,
  55. "date": (str(date[4:8])+":"+str(date[2:4])+":"+str(date[0:2]) +
  56. " "+str(date[9:11])+":"+str(date[11:13])+":00"),
  57. "tags": tags,
  58. }
  59. content = ''.join([str(i) for i in content]).strip()
  60. content += "\n<p>"+meta["tags"]+"</p>"
  61. payload = {"body": content,
  62. "title": meta["title"],
  63. "created": meta["date"]}
  64. known_api("phyks", API_KEY, "entry", payload)