2010-04-28 02:48:29 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# Copyright muflax <mail@muflax.com>, 2010
|
|
|
|
# License: GNU GPL 3 <http://www.gnu.org/copyleft/gpl.html>
|
|
|
|
|
2010-05-01 20:10:46 +02:00
|
|
|
import copy
|
2010-04-28 16:09:54 +02:00
|
|
|
import datetime as dt
|
2010-04-28 02:48:29 +02:00
|
|
|
import glob
|
|
|
|
import hashlib
|
2010-05-01 20:10:46 +02:00
|
|
|
import http.server
|
2010-04-28 16:09:54 +02:00
|
|
|
import optparse
|
2010-04-28 02:48:29 +02:00
|
|
|
import os
|
2010-04-28 16:09:54 +02:00
|
|
|
import os.path as op
|
2010-04-28 02:48:29 +02:00
|
|
|
import re
|
2010-04-28 22:04:14 +02:00
|
|
|
import shutil
|
2010-04-28 02:48:29 +02:00
|
|
|
import subprocess
|
2010-04-28 03:15:17 +02:00
|
|
|
import sys
|
2010-04-28 02:48:29 +02:00
|
|
|
|
|
|
|
import PyRSS2Gen as RSS2
|
|
|
|
import yaml
|
|
|
|
try:
|
|
|
|
from yaml import CLoader as Loader
|
|
|
|
except ImportError:
|
|
|
|
from yaml import Loader
|
2010-04-28 16:09:54 +02:00
|
|
|
|
|
|
|
class Webifier(object):
|
|
|
|
"""turn source files into static website"""
|
|
|
|
def __init__(self, src, out, styles, layout, force):
|
|
|
|
self.src = src
|
|
|
|
self.src = src
|
|
|
|
self.styles = styles
|
|
|
|
self.layout = layout
|
|
|
|
self.force = force
|
|
|
|
self.out = out
|
|
|
|
self.now = dt.datetime.now()
|
2010-05-03 07:49:46 +02:00
|
|
|
self.relist = re.compile("""
|
|
|
|
<li>
|
|
|
|
(?P<y>\d+) / (?P<m>\d+) / (?P<d>\d+):\
|
|
|
|
(?P<desc>.+?)
|
|
|
|
</li>
|
|
|
|
""", re.X|re.S)
|
|
|
|
self.recopy = re.compile("\.(yaml|pdc|swp)$")
|
|
|
|
|
2010-04-28 16:09:54 +02:00
|
|
|
def webify(self):
|
|
|
|
"""wrapper for the whole process"""
|
|
|
|
|
|
|
|
self.make_html_files(self.src, self.out)
|
|
|
|
self.make_rss_feed(op.join(self.out, "changelog.html"))
|
2010-04-28 22:04:14 +02:00
|
|
|
self.copy_media_files(self.src, self.out)
|
2010-05-01 20:10:46 +02:00
|
|
|
self.copy_media_files(self.styles, op.join(self.out, self.styles))
|
2010-04-28 22:04:14 +02:00
|
|
|
|
|
|
|
def copy_media_files(self, src, out):
|
|
|
|
"""copy all the other files, like images"""
|
|
|
|
# again, we manually walk this shit... *sigh*
|
|
|
|
for f in [f for f in os.listdir(src)
|
|
|
|
if (op.isfile(op.join(src, f))
|
2010-05-03 07:49:46 +02:00
|
|
|
and not self.recopy.search(f))]:
|
2010-05-01 20:10:46 +02:00
|
|
|
if not os.path.exists(out):
|
|
|
|
os.mkdir(out)
|
2010-04-28 22:04:14 +02:00
|
|
|
shutil.copy2(op.join(src, f), op.join(out, f))
|
|
|
|
|
|
|
|
for dir in [d for d in os.listdir(src)
|
|
|
|
if op.isdir(op.join(src, d))]:
|
|
|
|
self.copy_media_files(src=op.join(src, dir),
|
|
|
|
out=op.join(out, dir))
|
2010-04-28 16:09:54 +02:00
|
|
|
|
2010-05-01 20:10:46 +02:00
|
|
|
def _breadcrumbtagify(self, file, name=None):
|
2010-04-28 16:09:54 +02:00
|
|
|
"""turn an address and name into a proper link"""
|
|
|
|
if not name:
|
|
|
|
name = file
|
2010-04-28 02:48:29 +02:00
|
|
|
|
2010-04-28 16:09:54 +02:00
|
|
|
r = "<a href='{}' class='crumb'>{}</a>".format(file, name)
|
|
|
|
return r
|
|
|
|
|
|
|
|
def make_breadcrumb(self, file, meta):
|
|
|
|
"""turn current path into breadcrumb navigation"""
|
|
|
|
crumbs = []
|
2010-05-03 07:49:46 +02:00
|
|
|
mod = -1 if op.basename(file) == "index.html" else 0
|
|
|
|
for i in range(len(meta["cats"]) + mod):
|
2010-05-01 20:10:46 +02:00
|
|
|
catname = meta["cats"][i]["name"]
|
|
|
|
catfile = op.join(*[crumb["file"] for crumb in meta["cats"][:i+1]])
|
|
|
|
crumbs.append(self._breadcrumbtagify(catfile, catname))
|
|
|
|
|
|
|
|
crumbs.append("")
|
2010-04-28 16:09:54 +02:00
|
|
|
return " » ".join(crumbs)
|
|
|
|
|
|
|
|
def templatify(self, file, meta, out):
|
|
|
|
"""templatify file using meta and save it at out"""
|
|
|
|
print("\ttemplatifying {}...".format(file))
|
2010-05-01 20:10:46 +02:00
|
|
|
dest_file = op.basename(file).replace(".pdc", ".html")
|
|
|
|
dest = op.join(out, dest_file )
|
2010-04-28 16:09:54 +02:00
|
|
|
breadcrumb = self.make_breadcrumb(dest, meta)
|
2010-04-28 02:48:29 +02:00
|
|
|
|
2010-04-28 16:09:54 +02:00
|
|
|
pandoc = ["pandoc",
|
2010-05-01 20:10:46 +02:00
|
|
|
"--template", op.join(self.layout, meta["layout"]),
|
|
|
|
"--css", op.join("/", self.styles, meta["style"]),
|
2010-04-28 16:09:54 +02:00
|
|
|
"--variable", "breadcrumb:{}".format(breadcrumb),
|
2010-05-01 20:10:46 +02:00
|
|
|
"--variable", "filename:{}".format(dest_file),
|
2010-04-28 16:09:54 +02:00
|
|
|
"-o", dest,
|
|
|
|
file
|
|
|
|
]
|
|
|
|
subprocess.call(pandoc)
|
|
|
|
print("\t\tsaving as {}...".format(dest))
|
2010-05-03 07:49:46 +02:00
|
|
|
self.tidy_up_html(dest)
|
|
|
|
stat = os.stat(file)
|
|
|
|
os.utime(dest, (stat.st_mtime, stat.st_mtime))
|
2010-04-28 16:09:54 +02:00
|
|
|
|
2010-05-06 11:01:59 +02:00
|
|
|
def _is_newer(self, old, new):
|
|
|
|
"""return if old is newer than new"""
|
|
|
|
try:
|
|
|
|
mnew, mold = os.stat(new).st_mtime, os.stat(old).st_mtime
|
|
|
|
except OSError:
|
|
|
|
return True
|
|
|
|
return mold - mnew >= 1
|
|
|
|
|
2010-04-28 16:09:54 +02:00
|
|
|
def make_html_files(self, src, out, meta=None):
|
|
|
|
"""turn all *.pdc in src into html files in out"""
|
|
|
|
|
|
|
|
# we'll have to manually walk this shit...
|
|
|
|
# read the metadata and update the old one
|
2010-05-01 20:10:46 +02:00
|
|
|
meta = {} if meta == None else copy.deepcopy(meta)
|
2010-04-28 16:09:54 +02:00
|
|
|
print("reading metadata in {}...".format(src))
|
|
|
|
meta_file = op.join(src, "meta.yaml")
|
|
|
|
with open(meta_file, "r") as f:
|
|
|
|
data = yaml.load(f, Loader=Loader)
|
|
|
|
meta.update(data)
|
|
|
|
|
|
|
|
# add breadcrumb information to metadata
|
2010-05-01 20:10:46 +02:00
|
|
|
crumb = {"file": op.basename(out), "name": meta["title"]}
|
2010-04-28 16:09:54 +02:00
|
|
|
if "cats" in meta:
|
|
|
|
meta["cats"].append(crumb)
|
|
|
|
else: # root path, needs to be renamed
|
2010-05-01 20:10:46 +02:00
|
|
|
crumb["file"] = "/"
|
2010-04-28 16:09:54 +02:00
|
|
|
meta["cats"] = [crumb]
|
|
|
|
|
|
|
|
# templatify all files here
|
|
|
|
if not op.exists(out):
|
|
|
|
os.mkdir(out)
|
|
|
|
for f in glob.glob(src+"/*.pdc"):
|
2010-05-03 07:49:46 +02:00
|
|
|
dest_file = op.basename(f).replace(".pdc", ".html")
|
|
|
|
dest = op.join(out, dest_file )
|
2010-05-06 11:01:59 +02:00
|
|
|
if self.force or self._is_newer(f, dest):
|
2010-05-03 07:49:46 +02:00
|
|
|
self.templatify(f, meta, out)
|
2010-04-28 16:09:54 +02:00
|
|
|
|
|
|
|
# do the same for all subdirectories
|
|
|
|
for dir in [d for d in os.listdir(src)
|
|
|
|
if op.isdir(op.join(src, d))]:
|
|
|
|
self.make_html_files(src=op.join(src, dir),
|
|
|
|
out=op.join(out, dir),
|
|
|
|
meta=meta)
|
|
|
|
|
|
|
|
def make_rss_feed(self, changelog):
|
|
|
|
"""generate an RSS feed out of the Changelog"""
|
|
|
|
|
2010-05-03 07:49:46 +02:00
|
|
|
dest = op.join(self.out, "rss.xml")
|
2010-05-06 11:01:59 +02:00
|
|
|
if not (self.force or self._is_newer(changelog, dest)):
|
2010-05-03 07:49:46 +02:00
|
|
|
return
|
|
|
|
|
2010-04-28 16:09:54 +02:00
|
|
|
with open(changelog, "r") as f:
|
|
|
|
print("parsing {}...".format(changelog))
|
|
|
|
txt = f.read()
|
|
|
|
|
|
|
|
items = []
|
2010-05-03 07:49:46 +02:00
|
|
|
for entry in self.relist.finditer(txt):
|
2010-04-28 16:09:54 +02:00
|
|
|
items.append(
|
|
|
|
RSS2.RSSItem(
|
|
|
|
title = "omg new stuff!!w!",
|
|
|
|
link = "http://www.muflax.com/changelog.html",
|
|
|
|
description = entry.group("desc"),
|
|
|
|
pubDate = dt.datetime(
|
|
|
|
int(entry.group("y")),
|
|
|
|
int(entry.group("m")),
|
|
|
|
int(entry.group("d"))
|
|
|
|
),
|
|
|
|
guid = RSS2.Guid(
|
|
|
|
hashlib.md5(entry.group("desc").encode("utf8")).hexdigest()
|
|
|
|
)
|
2010-04-28 02:48:29 +02:00
|
|
|
)
|
|
|
|
)
|
2010-04-28 16:09:54 +02:00
|
|
|
|
|
|
|
if not items:
|
|
|
|
print("RSS broke... again...")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
feed = RSS2.RSS2(
|
|
|
|
title = "muflax.com",
|
|
|
|
link = "http://www.muflax.com",
|
|
|
|
description = "lies and wonderland",
|
|
|
|
lastBuildDate = dt.datetime.now(),
|
|
|
|
items = items[:10]
|
2010-04-28 02:48:29 +02:00
|
|
|
)
|
2010-04-28 16:09:54 +02:00
|
|
|
|
2010-05-03 07:49:46 +02:00
|
|
|
with open(dest, "w") as f:
|
2010-04-28 16:09:54 +02:00
|
|
|
print("writing RSS feed...")
|
|
|
|
feed.write_xml(f, encoding="utf8")
|
2010-05-03 07:49:46 +02:00
|
|
|
self.tidy_up_xml(dest)
|
|
|
|
os.utime(dest, (msrc, msrc))
|
2010-04-28 16:09:54 +02:00
|
|
|
|
2010-05-03 07:49:46 +02:00
|
|
|
def tidy_up_html(self, f):
|
|
|
|
"""clean up the html we generated earlier..."""
|
|
|
|
|
|
|
|
print("cleaning up {}...".format(f))
|
|
|
|
subprocess.call(["tidy", "--tidy-mark", "f", "-i", "-m", "-q",
|
|
|
|
"-utf8", f])
|
|
|
|
# What? You got a problem with me using Perl inside Python
|
|
|
|
# to avoid patching Haskell?
|
|
|
|
# Anyway, removes the last newline inside code blocks.
|
|
|
|
subprocess.call(["perl", "-i", "-p", "-e",
|
|
|
|
"s,<br /></code>,</code>,g", f])
|
|
|
|
|
|
|
|
def tidy_up_xml(self, f):
|
2010-04-28 16:09:54 +02:00
|
|
|
"""clean up all the xml we generated earlier..."""
|
|
|
|
|
2010-05-03 07:49:46 +02:00
|
|
|
print("cleaning up {}...".format(f))
|
|
|
|
subprocess.call(["tidy", "-xml", "-i", "-m", "-q", "-utf8", f])
|
2010-04-28 03:15:17 +02:00
|
|
|
|
2010-05-01 20:10:46 +02:00
|
|
|
def start_server(self, dir=""):
|
|
|
|
"""start a webserver"""
|
|
|
|
old = os.getcwd()
|
|
|
|
os.chdir(dir)
|
|
|
|
try:
|
|
|
|
http.server.test(HandlerClass=http.server.SimpleHTTPRequestHandler)
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
pass
|
|
|
|
finally:
|
|
|
|
os.chdir(old)
|
|
|
|
|
2010-04-28 02:48:29 +02:00
|
|
|
def main():
|
2010-04-28 16:09:54 +02:00
|
|
|
parser = optparse.OptionParser()
|
|
|
|
parser.add_option("-f", "--force", dest="force", action="store_true",
|
|
|
|
default=False, help="regenerate all files")
|
2010-05-01 20:10:46 +02:00
|
|
|
parser.add_option("-s", "--server", dest="server", action="store_true",
|
|
|
|
default=False, help="start webserver afterwards")
|
2010-04-28 16:09:54 +02:00
|
|
|
opt, args = parser.parse_args()
|
2010-05-01 20:10:46 +02:00
|
|
|
sys.argv[1:] = []
|
2010-04-28 22:04:14 +02:00
|
|
|
|
2010-04-28 16:09:54 +02:00
|
|
|
w = Webifier(src="src", out="out", styles="styles", layout="layout",
|
|
|
|
force=opt.force)
|
|
|
|
w.webify()
|
2010-04-28 02:48:29 +02:00
|
|
|
|
2010-05-01 20:10:46 +02:00
|
|
|
if opt.server:
|
|
|
|
w.start_server("out")
|
|
|
|
|
2010-04-28 02:48:29 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|