muflax65ngodyewp.onion/webifier.py

#!/usr/bin/env python3
# Copyright muflax <mail@muflax.com>, 2010
# License: GNU GPL 3 <http://www.gnu.org/copyleft/gpl.html>

import copy
import datetime as dt
import glob
import hashlib
import http.server
import optparse
import os
import os.path as op
import re
import shutil
import subprocess
import sys

import PyRSS2Gen as RSS2
import yaml
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml import Loader

class Webifier(object):
    """turn source files into static website"""
    def __init__(self, src, out, styles, layout, force):
        self.src = src
        self.src = src
        self.styles = styles
        self.layout = layout
        self.force = force
        self.out = out
        self.now = dt.datetime.now()
        self.relist = re.compile("""
                                <li>
                                (?P<y>\d+) / (?P<m>\d+) / (?P<d>\d+):\ 
                                (?P<desc>.+?)
                                </li>
                                """, re.X|re.S)
        self.recopy = re.compile("\.(yaml|pdc|swp)$")

    def webify(self):
        """wrapper for the whole process"""
            
        self.make_html_files(self.src, self.out)
        self.make_rss_feed(op.join(self.out, "changelog.html"))
        self.copy_media_files(self.src, self.out)
        self.copy_media_files(self.styles, op.join(self.out, self.styles))

    def copy_media_files(self, src, out):
        """copy all the other files, like images"""
        # again, we manually walk this shit... *sigh*
        for f in [f for f in os.listdir(src)
                  if (op.isfile(op.join(src, f))
                      and not self.recopy.search(f))]:
            if not os.path.exists(out):
                os.mkdir(out)
            shutil.copy2(op.join(src, f), op.join(out, f))
            
        for dir in [d for d in os.listdir(src) 
                    if op.isdir(op.join(src, d))]:
            self.copy_media_files(src=op.join(src, dir), 
                                  out=op.join(out, dir))

    def _breadcrumbtagify(self, file, name=None):
        """turn an address and name into a proper link"""
        if not name:
            name = file
        
        r = "<a href='{}' class='crumb'>{}</a>".format(file, name)
        return r

    def make_breadcrumb(self, file, meta):
        """turn current path into breadcrumb navigation""" 
        crumbs = []
        mod = -1 if op.basename(file) == "index.html" else 0
        for i in range(len(meta["cats"]) + mod):
            catname = meta["cats"][i]["name"]
            catfile = op.join(*[crumb["file"] for crumb in meta["cats"][:i+1]])
            crumbs.append(self._breadcrumbtagify(catfile, catname))

        crumbs.append("")
        return " &#187; ".join(crumbs)

    def templatify(self, file, meta, out):
        """templatify file using meta and save it at out"""
        print("\ttemplatifying {}...".format(file))
        dest_file = op.basename(file).replace(".pdc", ".html") 
        dest = op.join(out, dest_file )
        breadcrumb = self.make_breadcrumb(dest, meta)
        
        pandoc = ["pandoc",
                  "--template", op.join(self.layout, meta["layout"]),
                  "--css", op.join("/", self.styles, meta["style"]),
                  "--variable", "breadcrumb:{}".format(breadcrumb),
                  "--variable", "filename:{}".format(dest_file),
                  "-o", dest,
                  file
                 ]
        subprocess.call(pandoc)
        print("\t\tsaving as {}...".format(dest))
        self.tidy_up_html(dest)
        stat = os.stat(file)
        os.utime(dest, (stat.st_mtime, stat.st_mtime))

    def _is_newer(self, old, new):
        """return if old is newer than new"""
        try:
            mnew, mold = os.stat(new).st_mtime, os.stat(old).st_mtime 
        except OSError:
            return True
        return mold - mnew >= 1

    def make_html_files(self, src, out, meta=None):
        """turn all *.pdc in src into html files in out"""
        
        # we'll have to manually walk this shit...
        # read the metadata and update the old one   
        meta = {} if meta == None else copy.deepcopy(meta) 
        print("reading metadata in {}...".format(src))
        meta_file = op.join(src, "meta.yaml")
        with open(meta_file, "r") as f:
            data = yaml.load(f, Loader=Loader)
            meta.update(data)

        # add breadcrumb information to metadata
        crumb = {"file": op.basename(out), "name": meta["title"]}
        if "cats" in meta:
            meta["cats"].append(crumb)
        else: # root path, needs to be renamed
            crumb["file"] = "/"
            meta["cats"] = [crumb]
            
        # templatify all files here
        if not op.exists(out):
            os.mkdir(out)
        for f in glob.glob(src+"/*.pdc"):
            dest_file = op.basename(f).replace(".pdc", ".html") 
            dest = op.join(out, dest_file )
            if self.force or self._is_newer(f, dest):
                self.templatify(f, meta, out)
        
        # do the same for all subdirectories 
        for dir in [d for d in os.listdir(src) 
                    if op.isdir(op.join(src, d))]:
            self.make_html_files(src=op.join(src, dir), 
                                 out=op.join(out, dir),
                                 meta=meta)

    def make_rss_feed(self, changelog):
        """generate an RSS feed out of the Changelog"""
            
        dest = op.join(self.out, "rss.xml")
        if not (self.force or self._is_newer(changelog, dest)):
            return

        with open(changelog, "r") as f:
            print("parsing {}...".format(changelog))
            txt = f.read()
            
        items = []
        for entry in self.relist.finditer(txt):
            items.append(
                RSS2.RSSItem(
                    title = "omg new stuff!!w!",
                    link = "http://www.muflax.com/changelog.html",
                    description = entry.group("desc"),
                    pubDate = dt.datetime(
                        int(entry.group("y")),
                        int(entry.group("m")),
                        int(entry.group("d"))
                    ),
                    guid = RSS2.Guid(
                        hashlib.md5(entry.group("desc").encode("utf8")).hexdigest()
                    )
                )
            )
        
        if not items:
            print("RSS broke... again...")
            sys.exit(1)

        feed = RSS2.RSS2(
            title = "muflax.com",
            link = "http://www.muflax.com",
            description = "lies and wonderland",
            lastBuildDate = dt.datetime.now(),
            items = items[:10]
        )

        with open(dest, "w") as f:
            print("writing RSS feed...")
            feed.write_xml(f, encoding="utf8")
            self.tidy_up_xml(dest)
            os.utime(dest, (msrc, msrc))

    def tidy_up_html(self, f):
        """clean up the html we generated earlier..."""
            
        print("cleaning up {}...".format(f))
        subprocess.call(["tidy", "--tidy-mark", "f", "-i", "-m", "-q",
                         "-utf8", f])
        # What? You got a problem with me using Perl inside Python
        # to avoid patching Haskell?
        # Anyway, removes the last newline inside code blocks.
        subprocess.call(["perl", "-i", "-p", "-e", 
                         "s,<br /></code>,</code>,g", f])

    def tidy_up_xml(self, f):
        """clean up all the xml we generated earlier..."""
        
        print("cleaning up {}...".format(f))
        subprocess.call(["tidy", "-xml", "-i", "-m", "-q", "-utf8", f])

    def start_server(self, dir=""):
        """start a webserver"""
        old = os.getcwd()
        os.chdir(dir)
        try:
            http.server.test(HandlerClass=http.server.SimpleHTTPRequestHandler)
        except KeyboardInterrupt:
            pass
        finally:
            os.chdir(old)

def main():
    parser = optparse.OptionParser()
    parser.add_option("-f", "--force", dest="force", action="store_true",
                      default=False, help="regenerate all files")
    parser.add_option("-s", "--server", dest="server", action="store_true",
                      default=False, help="start webserver afterwards")
    opt, args = parser.parse_args()
    sys.argv[1:] = []

    w = Webifier(src="src", out="out", styles="styles", layout="layout",
                 force=opt.force)
    w.webify()

    if opt.server:
        w.start_server("out")

if __name__ == "__main__":
    main()
first commit, builds already 2010-04-28 02:48:29 +02:00			`#!/usr/bin/env python3`
			`# Copyright muflax <mail@muflax.com>, 2010`
			`# License: GNU GPL 3 <http://www.gnu.org/copyleft/gpl.html>`

testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`import copy`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`import datetime as dt`
first commit, builds already 2010-04-28 02:48:29 +02:00			`import glob`
			`import hashlib`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`import http.server`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`import optparse`
first commit, builds already 2010-04-28 02:48:29 +02:00			`import os`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`import os.path as op`
first commit, builds already 2010-04-28 02:48:29 +02:00			`import re`
webifier basically done 2010-04-28 22:04:14 +02:00			`import shutil`
first commit, builds already 2010-04-28 02:48:29 +02:00			`import subprocess`
bugfixes 2010-04-28 03:15:17 +02:00			`import sys`
first commit, builds already 2010-04-28 02:48:29 +02:00
			`import PyRSS2Gen as RSS2`
			`import yaml`
			`try:`
			`from yaml import CLoader as Loader`
			`except ImportError:`
			`from yaml import Loader`
cleanup, objectify 2010-04-28 16:09:54 +02:00
			`class Webifier(object):`
			`"""turn source files into static website"""`
			`def __init__(self, src, out, styles, layout, force):`
			`self.src = src`
			`self.src = src`
			`self.styles = styles`
			`self.layout = layout`
			`self.force = force`
			`self.out = out`
			`self.now = dt.datetime.now()`
speedup 2010-05-03 07:49:46 +02:00			`self.relist = re.compile("""`
			`<li>`
			`(?P<y>\d+) / (?P<m>\d+) / (?P<d>\d+):\`
			`(?P<desc>.+?)`
			`</li>`
			`""", re.X\|re.S)`
			`self.recopy = re.compile("\.(yaml\|pdc\|swp)$")`

cleanup, objectify 2010-04-28 16:09:54 +02:00			`def webify(self):`
			`"""wrapper for the whole process"""`

			`self.make_html_files(self.src, self.out)`
			`self.make_rss_feed(op.join(self.out, "changelog.html"))`
webifier basically done 2010-04-28 22:04:14 +02:00			`self.copy_media_files(self.src, self.out)`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`self.copy_media_files(self.styles, op.join(self.out, self.styles))`
webifier basically done 2010-04-28 22:04:14 +02:00
			`def copy_media_files(self, src, out):`
			`"""copy all the other files, like images"""`
			`# again, we manually walk this shit... sigh`
			`for f in [f for f in os.listdir(src)`
			`if (op.isfile(op.join(src, f))`
speedup 2010-05-03 07:49:46 +02:00			`and not self.recopy.search(f))]:`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`if not os.path.exists(out):`
			`os.mkdir(out)`
webifier basically done 2010-04-28 22:04:14 +02:00			`shutil.copy2(op.join(src, f), op.join(out, f))`

			`for dir in [d for d in os.listdir(src)`
			`if op.isdir(op.join(src, d))]:`
			`self.copy_media_files(src=op.join(src, dir),`
			`out=op.join(out, dir))`
cleanup, objectify 2010-04-28 16:09:54 +02:00
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`def _breadcrumbtagify(self, file, name=None):`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`"""turn an address and name into a proper link"""`
			`if not name:`
			`name = file`
first commit, builds already 2010-04-28 02:48:29 +02:00
cleanup, objectify 2010-04-28 16:09:54 +02:00			`r = "<a href='{}' class='crumb'>{}</a>".format(file, name)`
			`return r`

			`def make_breadcrumb(self, file, meta):`
			`"""turn current path into breadcrumb navigation"""`
			`crumbs = []`
speedup 2010-05-03 07:49:46 +02:00			`mod = -1 if op.basename(file) == "index.html" else 0`
			`for i in range(len(meta["cats"]) + mod):`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`catname = meta["cats"][i]["name"]`
			`catfile = op.join(*[crumb["file"] for crumb in meta["cats"][:i+1]])`
			`crumbs.append(self._breadcrumbtagify(catfile, catname))`

			`crumbs.append("")`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`return " » ".join(crumbs)`

			`def templatify(self, file, meta, out):`
			`"""templatify file using meta and save it at out"""`
			`print("\ttemplatifying {}...".format(file))`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`dest_file = op.basename(file).replace(".pdc", ".html")`
			`dest = op.join(out, dest_file )`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`breadcrumb = self.make_breadcrumb(dest, meta)`
first commit, builds already 2010-04-28 02:48:29 +02:00
cleanup, objectify 2010-04-28 16:09:54 +02:00			`pandoc = ["pandoc",`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`"--template", op.join(self.layout, meta["layout"]),`
			`"--css", op.join("/", self.styles, meta["style"]),`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`"--variable", "breadcrumb:{}".format(breadcrumb),`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`"--variable", "filename:{}".format(dest_file),`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`"-o", dest,`
			`file`
			`]`
			`subprocess.call(pandoc)`
			`print("\t\tsaving as {}...".format(dest))`
speedup 2010-05-03 07:49:46 +02:00			`self.tidy_up_html(dest)`
			`stat = os.stat(file)`
			`os.utime(dest, (stat.st_mtime, stat.st_mtime))`
cleanup, objectify 2010-04-28 16:09:54 +02:00
bugfix 2010-05-06 11:01:59 +02:00			`def _is_newer(self, old, new):`
			`"""return if old is newer than new"""`
			`try:`
			`mnew, mold = os.stat(new).st_mtime, os.stat(old).st_mtime`
			`except OSError:`
			`return True`
			`return mold - mnew >= 1`

cleanup, objectify 2010-04-28 16:09:54 +02:00			`def make_html_files(self, src, out, meta=None):`
			`"""turn all *.pdc in src into html files in out"""`

			`# we'll have to manually walk this shit...`
			`# read the metadata and update the old one`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`meta = {} if meta == None else copy.deepcopy(meta)`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`print("reading metadata in {}...".format(src))`
			`meta_file = op.join(src, "meta.yaml")`
			`with open(meta_file, "r") as f:`
			`data = yaml.load(f, Loader=Loader)`
			`meta.update(data)`

			`# add breadcrumb information to metadata`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`crumb = {"file": op.basename(out), "name": meta["title"]}`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`if "cats" in meta:`
			`meta["cats"].append(crumb)`
			`else: # root path, needs to be renamed`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`crumb["file"] = "/"`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`meta["cats"] = [crumb]`

			`# templatify all files here`
			`if not op.exists(out):`
			`os.mkdir(out)`
			`for f in glob.glob(src+"/*.pdc"):`
speedup 2010-05-03 07:49:46 +02:00			`dest_file = op.basename(f).replace(".pdc", ".html")`
			`dest = op.join(out, dest_file )`
bugfix 2010-05-06 11:01:59 +02:00			`if self.force or self._is_newer(f, dest):`
speedup 2010-05-03 07:49:46 +02:00			`self.templatify(f, meta, out)`
cleanup, objectify 2010-04-28 16:09:54 +02:00
			`# do the same for all subdirectories`
			`for dir in [d for d in os.listdir(src)`
			`if op.isdir(op.join(src, d))]:`
			`self.make_html_files(src=op.join(src, dir),`
			`out=op.join(out, dir),`
			`meta=meta)`

			`def make_rss_feed(self, changelog):`
			`"""generate an RSS feed out of the Changelog"""`

speedup 2010-05-03 07:49:46 +02:00			`dest = op.join(self.out, "rss.xml")`
bugfix 2010-05-06 11:01:59 +02:00			`if not (self.force or self._is_newer(changelog, dest)):`
speedup 2010-05-03 07:49:46 +02:00			`return`

cleanup, objectify 2010-04-28 16:09:54 +02:00			`with open(changelog, "r") as f:`
			`print("parsing {}...".format(changelog))`
			`txt = f.read()`

			`items = []`
speedup 2010-05-03 07:49:46 +02:00			`for entry in self.relist.finditer(txt):`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`items.append(`
			`RSS2.RSSItem(`
			`title = "omg new stuff!!w!",`
			`link = "http://www.muflax.com/changelog.html",`
			`description = entry.group("desc"),`
			`pubDate = dt.datetime(`
			`int(entry.group("y")),`
			`int(entry.group("m")),`
			`int(entry.group("d"))`
			`),`
			`guid = RSS2.Guid(`
			`hashlib.md5(entry.group("desc").encode("utf8")).hexdigest()`
			`)`
first commit, builds already 2010-04-28 02:48:29 +02:00			`)`
			`)`
cleanup, objectify 2010-04-28 16:09:54 +02:00
			`if not items:`
			`print("RSS broke... again...")`
			`sys.exit(1)`

			`feed = RSS2.RSS2(`
			`title = "muflax.com",`
			`link = "http://www.muflax.com",`
			`description = "lies and wonderland",`
			`lastBuildDate = dt.datetime.now(),`
			`items = items[:10]`
first commit, builds already 2010-04-28 02:48:29 +02:00			`)`
cleanup, objectify 2010-04-28 16:09:54 +02:00
speedup 2010-05-03 07:49:46 +02:00			`with open(dest, "w") as f:`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`print("writing RSS feed...")`
			`feed.write_xml(f, encoding="utf8")`
speedup 2010-05-03 07:49:46 +02:00			`self.tidy_up_xml(dest)`
			`os.utime(dest, (msrc, msrc))`
cleanup, objectify 2010-04-28 16:09:54 +02:00
speedup 2010-05-03 07:49:46 +02:00			`def tidy_up_html(self, f):`
			`"""clean up the html we generated earlier..."""`

			`print("cleaning up {}...".format(f))`
			`subprocess.call(["tidy", "--tidy-mark", "f", "-i", "-m", "-q",`
			`"-utf8", f])`
			`# What? You got a problem with me using Perl inside Python`
			`# to avoid patching Haskell?`
			`# Anyway, removes the last newline inside code blocks.`
			`subprocess.call(["perl", "-i", "-p", "-e",`
			`"s,<br /></code>,</code>,g", f])`

			`def tidy_up_xml(self, f):`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`"""clean up all the xml we generated earlier..."""`

speedup 2010-05-03 07:49:46 +02:00			`print("cleaning up {}...".format(f))`
			`subprocess.call(["tidy", "-xml", "-i", "-m", "-q", "-utf8", f])`
bugfixes 2010-04-28 03:15:17 +02:00
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`def start_server(self, dir=""):`
			`"""start a webserver"""`
			`old = os.getcwd()`
			`os.chdir(dir)`
			`try:`
			`http.server.test(HandlerClass=http.server.SimpleHTTPRequestHandler)`
			`except KeyboardInterrupt:`
			`pass`
			`finally:`
			`os.chdir(old)`

first commit, builds already 2010-04-28 02:48:29 +02:00			`def main():`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`parser = optparse.OptionParser()`
			`parser.add_option("-f", "--force", dest="force", action="store_true",`
			`default=False, help="regenerate all files")`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`parser.add_option("-s", "--server", dest="server", action="store_true",`
			`default=False, help="start webserver afterwards")`
cleanup, objectify 2010-04-28 16:09:54 +02:00			`opt, args = parser.parse_args()`
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`sys.argv[1:] = []`
webifier basically done 2010-04-28 22:04:14 +02:00
cleanup, objectify 2010-04-28 16:09:54 +02:00			`w = Webifier(src="src", out="out", styles="styles", layout="layout",`
			`force=opt.force)`
			`w.webify()`
first commit, builds already 2010-04-28 02:48:29 +02:00
testserver, css instead of clevercss 2010-05-01 20:10:46 +02:00			`if opt.server:`
			`w.start_server("out")`

first commit, builds already 2010-04-28 02:48:29 +02:00			`if __name__ == "__main__":`
			`main()`