From bf0b3f2a096832a9063f7816dcc14a0607f67271 Mon Sep 17 00:00:00 2001
From: Scott Bahling <sbahling@mudgum.net>
Date: Sun, 17 Mar 2013 21:26:02 +0100
Subject: initial commit

---
 panfry/__init__.py  |   0
 panfry/_version.py  | 192 +++++++++++++++++++++++++++
 panfry/cli.py       | 144 +++++++++++++++++++++
 panfry/document.py  | 159 +++++++++++++++++++++++
 panfry/main.py      |  46 +++++++
 panfry/page.py      |  59 +++++++++
 panfry/templater.py |  44 +++++++
 panfry/util.py      | 365 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1009 insertions(+)
 create mode 100644 panfry/__init__.py
 create mode 100644 panfry/_version.py
 create mode 100755 panfry/cli.py
 create mode 100755 panfry/document.py
 create mode 100755 panfry/main.py
 create mode 100755 panfry/page.py
 create mode 100755 panfry/templater.py
 create mode 100755 panfry/util.py

(limited to 'panfry')

diff --git a/panfry/__init__.py b/panfry/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/panfry/_version.py b/panfry/_version.py
new file mode 100644
index 0000000..84381f5
--- /dev/null
+++ b/panfry/_version.py
@@ -0,0 +1,192 @@
+
+IN_LONG_VERSION_PY = True
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (build by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.7 (https://github.com/warner/python-versioneer)
+
+# these strings will be replaced by git during git-archive
+git_refnames = "$Format:%d$"
+git_full = "$Format:%H$"
+
+
+import subprocess
+
+def run_command(args, cwd=None, verbose=False):
+    try:
+        # remember shell=False, so use git.cmd on windows, not just git
+        p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)
+    except EnvironmentError, e:
+        if verbose:
+            print "unable to run %s" % args[0]
+            print e
+        return None
+    stdout = p.communicate()[0].strip()
+    if p.returncode != 0:
+        if verbose:
+            print "unable to run %s (error)" % args[0]
+        return None
+    return stdout
+
+
+import sys
+import re
+import os.path
+
+def get_expanded_variables(versionfile_source):
+    # the code embedded in _version.py can just fetch the value of these
+    # variables. When used from setup.py, we don't want to import
+    # _version.py, so we do it with a regexp instead. This function is not
+    # used from _version.py.
+    variables = {}
+    try:
+        for line in open(versionfile_source,"r").readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    variables["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    variables["full"] = mo.group(1)
+    except EnvironmentError:
+        pass
+    return variables
+
+def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
+    refnames = variables["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print "variables are unexpanded, not using"
+        return {} # unexpanded, so not in an unpacked git-archive tarball
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    for ref in list(refs):
+        if not re.search(r'\d', ref):
+            if verbose:
+                print "discarding '%s', no digits" % ref
+            refs.discard(ref)
+            # Assume all version tags have a digit. git's %d expansion
+            # behaves like git log --decorate=short and strips out the
+            # refs/heads/ and refs/tags/ prefixes that would let us
+            # distinguish between branches and tags. By ignoring refnames
+            # without digits, we filter out many common branch names like
+            # "release" and "stabilization", as well as "HEAD" and "master".
+    if verbose:
+        print "remaining refs:", ",".join(sorted(refs))
+    for ref in sorted(refs):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print "picking %s" % r
+            return { "version": r,
+                     "full": variables["full"].strip() }
+    # no suitable tags, so we use the full revision id
+    if verbose:
+        print "no suitable tags, using full revision id"
+    return { "version": variables["full"].strip(),
+             "full": variables["full"].strip() }
+
+def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
+    # this runs 'git' from the root of the source tree. That either means
+    # someone ran a setup.py command (and this code is in versioneer.py, so
+    # IN_LONG_VERSION_PY=False, thus the containing directory is the root of
+    # the source tree), or someone ran a project-specific entry point (and
+    # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the
+    # containing directory is somewhere deeper in the source tree). This only
+    # gets called if the git-archive 'subst' variables were *not* expanded,
+    # and _version.py hasn't already been rewritten with a short version
+    # string, meaning we're inside a checked out source tree.
+
+    try:
+        here = os.path.abspath(__file__)
+    except NameError:
+        # some py2exe/bbfreeze/non-CPython implementations don't do __file__
+        return {} # not always correct
+
+    # versionfile_source is the relative path from the top of the source tree
+    # (where the .git directory might live) to this file. Invert this to find
+    # the root from __file__.
+    root = here
+    if IN_LONG_VERSION_PY:
+        for i in range(len(versionfile_source.split("/"))):
+            root = os.path.dirname(root)
+    else:
+        root = os.path.dirname(here)
+    if not os.path.exists(os.path.join(root, ".git")):
+        if verbose:
+            print "no .git in", root
+        return {}
+
+    GIT = "git"
+    if sys.platform == "win32":
+        GIT = "git.cmd"
+    stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],
+                         cwd=root)
+    if stdout is None:
+        return {}
+    if not stdout.startswith(tag_prefix):
+        if verbose:
+            print "tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)
+        return {}
+    tag = stdout[len(tag_prefix):]
+    stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
+    if stdout is None:
+        return {}
+    full = stdout.strip()
+    if tag.endswith("-dirty"):
+        full += "-dirty"
+    return {"version": tag, "full": full}
+
+
+def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
+    if IN_LONG_VERSION_PY:
+        # We're running from _version.py. If it's from a source tree
+        # (execute-in-place), we can work upwards to find the root of the
+        # tree, and then check the parent directory for a version string. If
+        # it's in an installed application, there's no hope.
+        try:
+            here = os.path.abspath(__file__)
+        except NameError:
+            # py2exe/bbfreeze/non-CPython don't have __file__
+            return {} # without __file__, we have no hope
+        # versionfile_source is the relative path from the top of the source
+        # tree to _version.py. Invert this to find the root from __file__.
+        root = here
+        for i in range(len(versionfile_source.split("/"))):
+            root = os.path.dirname(root)
+    else:
+        # we're running from versioneer.py, which means we're running from
+        # the setup.py in a source tree. sys.argv[0] is setup.py in the root.
+        here = os.path.abspath(sys.argv[0])
+        root = os.path.dirname(here)
+
+    # Source tarballs conventionally unpack into a directory that includes
+    # both the project name and a version string.
+    dirname = os.path.basename(root)
+    if not dirname.startswith(parentdir_prefix):
+        if verbose:
+            print "guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %                   (root, dirname, parentdir_prefix)
+        return None
+    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+
+tag_prefix = "panfry-"
+parentdir_prefix = "panfry-"
+versionfile_source = "panfry/_version.py"
+
+def get_versions(default={"version": "0.0.1", "full": ""}, verbose=False):
+    variables = { "refnames": git_refnames, "full": git_full }
+    ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
+    if not ver:
+        ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
+    if not ver:
+        ver = versions_from_parentdir(parentdir_prefix, versionfile_source,
+                                      verbose)
+    if not ver:
+        ver = default
+    return ver
+
diff --git a/panfry/cli.py b/panfry/cli.py
new file mode 100755
index 0000000..6b4a64d
--- /dev/null
+++ b/panfry/cli.py
@@ -0,0 +1,144 @@
+#
+# Copyright (c) 2013 Scott Bahling, <sbahling@mudgum.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program (see the file COPYING); if not, write to the
+# Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+################################################################
+#
+# cli.py
+# command line argument handling
+
+import yaml
+import argparse
+import copy
+
+__version__ = '0.0.1'
+
+#################################################################
+#
+# The argparse setup is mapped out
+# in a yaml based config called cli_config. The init_argparse
+# methd uses the cli_config to setup the main and sub command
+# parsers.
+#
+#################################################################
+
+#################################################################
+# Argument Parser configuration (YAML)
+#################################################################
+
+cli_config = """
+Args:
+  debug_enabled:
+    flags: [-d, --debug]
+    action: store_true
+    default: False
+    help: 'Turn on verbose messages.'
+
+  run_quiet:
+    flags: [-q, --quiet]
+    action: store_true
+    default: False
+    help: 'Turn off all (debug, warn, info) messages.'
+
+  logfile:
+    flags: [-L, --logfile]
+    default: ''
+    help: 'File to write logs to. Default ./pldptools.log'
+
+  pub_path:
+    flags: [-p, --pubdir]
+    default: './pub'
+    help: 'Directory to place generated documents'
+
+  src_path:
+    flags: [-s, --srcdir]
+    default: '.'
+    help: 'Directory where document source is located'
+
+  templates_path:
+    flags: [-T, --templates]
+    default: ''
+    help: 'Directory where document templates are located'
+
+  css:
+    flags: [-C, --css]
+    default: 'css/style.css'
+    help: |
+        css file for html pages.
+        Includes full path relative to html directory.
+
+
+Parser:
+  help:
+  args:
+    - debug_enabled
+    - run_quiet
+    - logfile
+
+Subparsers:
+  gen:
+    help: generate document from source
+    args:
+      - src_path
+      - pub_path
+      - templates_path
+      - css
+"""
+
+config = yaml.load(cli_config)
+
+parser_config = config.get('Parser', {})
+args = config.get('Args', {})
+subs = config.get('Subparsers', {})
+
+
+def _add_arg(parser, argtype):
+    opts = copy.copy(args.get(argtype))
+    if opts:
+        opts.setdefault('dest', argtype)
+        flags = opts.pop('flags', [])
+        parser.add_argument(*flags, **opts)
+    else:
+        print "Unknown argument type: %s" % argtype
+
+
+def init_argparser():
+
+    description = parser_config.get('description', '')
+    formatter_class = argparse.RawTextHelpFormatter
+    parser = argparse.ArgumentParser(description=description,
+                                     formatter_class=formatter_class,
+                                     )
+    subparsers = parser.add_subparsers()
+
+    version = 'Panfry version: %s' % __version__
+    parser.add_argument('--version', action='version', version=version)
+
+    # setup main parser
+    for item in config['Parser'].get('args', []):
+        _add_arg(parser, item)
+
+    # setup sub-command parsers
+    for sub, conf in subs.items():
+        subparser = subparsers.add_parser(sub,
+                                          help=conf.get('help', ''),
+                                          formatter_class=formatter_class,
+                                          )
+        subparser.set_defaults(cmd=sub)
+        for item in conf.get('args', []):
+            _add_arg(subparser, item)
+
+    return parser
diff --git a/panfry/document.py b/panfry/document.py
new file mode 100755
index 0000000..42ef959
--- /dev/null
+++ b/panfry/document.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import re
+import pandoc
+from tempfile import NamedTemporaryFile
+from panfry.util import *
+
+
+class Document:
+
+    def __init__(self, path):
+        self.src_path = path
+        self.meta = self.get_meta(path)
+        self.pages = self.get_pages(path)
+
+    css_file = 'css/style.css'
+    workdir = 'stdocs-work'
+
+    def get_pages(self, path):
+        pages = []
+        filelist = get_lines(self.meta['TOC'])
+        for filename in filelist:
+            source = read_file(os.path.join(path, filename))
+            if source:
+                pages.append(Page(filename, source))
+            else:
+                print("WARNING: Page: %s not found or is empty." % filename)
+
+        return pages
+
+    def get_meta(self, path):
+        metafiles = ['TOC', 'HEADER']
+        meta = {}
+        for filename in metafiles:
+            source = read_file(os.path.join(path, filename))
+            if source:
+                meta[filename] = source
+            else:
+                print("!E: %s not found or is empty. Aborting..." % filename)
+                exit(1)
+
+        return meta
+
+    def set_templater(self, templater):
+        self.templater = templater
+
+    def next_page(self, page):
+        try:
+            idx = self.pages.index(page)
+        except:
+            return ''
+        if idx >= len(self.pages) - 1:
+            return ''
+        return self.pages[idx+1]
+
+    def prev_page(self, page):
+        try:
+            idx = self.pages.index(page)
+        except:
+            return ''
+        if idx == 0:
+            return ''
+        return self.pages[idx-1]
+
+    @property
+    def toc_links(self):
+        '''
+        Returns a list of dictionaries. Each dictionary element contains
+        a page title and the html file name.
+        '''
+        links = []
+        for page in self.pages:
+            title, level = page.title
+            links.append(dict(link=unicode(page.htmlfile, "utf8"),
+                              text=unicode(title, "utf8"),
+                              level=level,
+                              ))
+
+        return links
+
+    @property
+    def pdf_filename(self):
+        header = self.meta['HEADER']
+        m = re.match('(^%)(.*\n.*)(%*)', header)
+        if m:
+            title = m.group(2).strip()
+            title = re.sub(r'\s+', ' ', title)
+            title = re.sub(r'[\n ]', '_', title)
+            title = re.sub(r'[:,]', '-', title)
+            title = re.sub(r'_+', '_', title)
+            title = re.sub(r'-_', '-', title)
+            title = re.sub(r'-+', '-', title)
+        else:
+            title = os.path.split(self.src_path)[1]
+
+        return title + '.pdf'
+
+    def publish_pdf(self, pub_path):
+        pdf_path = os.path.join(pub_path, self.pdf_filename)
+        src = self.meta['HEADER']
+        for page in self.pages:
+            src += '\n%s' % page.source
+
+        doc = pandoc.Document()
+        doc.markdown = src
+
+        pandoc.set_cwd(os.path.abspath(self.src_path))
+        doc.to_file(pdf_path)
+        pandoc.set_cwd(None)
+
+        return self.pdf_filename
+
+    def publish_css(self, pub_path):
+        src = os.path.join(self.src_path, 'css')
+        dst = os.path.join(pub_path, 'css')
+        if os.path.isdir(src):
+            copy(src, dst)
+
+    def publish_images(self, pub_path):
+        src_path = self.src_path
+        if os.path.exists(os.path.join(src_path, 'images/html')):
+            src = os.path.join(src_path, 'images/html')
+        else:
+            src = os.path.join(src_path, 'images')
+
+        dst = os.path.join(pub_path, 'images')
+        if os.path.exists(src):
+            copy(src, dst)
+
+    def publish_html(self, pub_path):
+        pandoc.set_cwd(None)
+        for page in self.pages:
+            print("generating %s..." % page.htmlfile)
+            template_file = NamedTemporaryFile(mode='w',
+                                               suffix='pf.template',
+                                               delete=False)
+            template = self.templater.page_template(self, page)
+            template_file.write(template)
+            template_file.close()
+            doc = pandoc.Document()
+            doc.add_argument('toc')
+            doc.add_argument('template=%s' % template_file.name)
+            doc.add_argument('css=%s' % self.css_file)
+            doc.markdown = page.source
+            content = doc.html
+            write_file(os.path.join(pub_path, page.htmlfile),
+                       unicode(content, 'utf-8'))
+
+        # If there is not explicit index.html, then link 'index.html'
+        # to the toplevel page.
+        if not 'index.md' in self.pages:
+            src = os.path.join(self.pages[0].htmlfile)
+            ref = os.path.join(pub_path, 'index.html')
+            os.symlink(src, ref)
+
+        ###### Copy any images to publish directory
+        self.publish_css(pub_path)
+        self.publish_images(pub_path)
diff --git a/panfry/main.py b/panfry/main.py
new file mode 100755
index 0000000..1a7acf7
--- /dev/null
+++ b/panfry/main.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import shutil
+import panfry.cli
+from panfry.templater import Templater
+from panfry.document import Document
+
+
+def get_env():
+    env = panfry.cli.init_argparser().parse_args()
+    if not os.path.isdir(env.src_path):
+        print("%s directory not found. Aborting..." % env.src_path)
+        exit(1)
+
+    env.pub_path = os.path.abspath(env.pub_path)
+
+    if not env.templates_path:
+        env.templates_path = os.path.join(env.src_path, 'templates')
+
+    if not os.path.isdir(env.templates_path):
+        print("No templates path found. Aborting...")
+        exit(1)
+
+    return env
+
+def main():
+    env = get_env()
+    if os.path.exists(env.pub_path):
+        shutil.rmtree(env.pub_path)
+    os.mkdir(env.pub_path)
+
+    document = Document(env.src_path)
+    document.set_templater(Templater(env.templates_path))
+
+    ###### Create PDF
+    pdffile = document.publish_pdf(env.pub_path)
+    print("Wrote PDF: %s" % pdffile)
+
+    ###### Create HTML
+    document.publish_html(env.pub_path)
+
+    exit(0)
+
+if __name__ == "__main__":
+    main()
diff --git a/panfry/page.py b/panfry/page.py
new file mode 100755
index 0000000..27b4cb1
--- /dev/null
+++ b/panfry/page.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+from panfry.utils import get_lines
+
+
+class Page:
+    '''
+    Represents single source file.
+    '''
+    def __init__(self, filename, source):
+        self.filename = filename
+        self.source = source
+
+    @property
+    def title(self):
+        '''
+        Returns a tuple containing a string representing the page title
+        and an integer representing the level in the document outline
+        hierarchy.
+
+        The level is preserved so that page titles can be properly
+        positioned (indented) in the table of contents.
+
+        If the page has a pandoc title block, the title is retreived
+        from there and returned with a level of 1
+
+        If a title block is not found, the first heading is returned
+        with the corrisponding heading level.
+
+        Finally, if no title block or headings are found in the page,
+        the filename is returned as the title with underscores changed
+        to spaces.
+        '''
+        title = ''
+        for line in get_lines(self.source):
+            if line.startswith('% '):
+                title = line.split(' ', 1)[1].strip()
+                if '(' and ')' in line:
+                    num = line.split('(')[1][0]
+                    return 'man(%s) %s' % (num, title.split('(')[0].strip())
+                else:
+                    return (title, 1)
+            if re.match('[=]{2}', line):
+                return (title, 1)
+            if re.match('[-]{2}', line):
+                return (title, 2)
+            if re.match('#+.+[A-z|0-9]', line):
+                level = len(re.match('#+', line).group())
+                return (line.split(' ', 1)[1].strip(), level)
+            title = line.strip()
+
+        title = self.filename.replace('_', ' ')
+
+        return (title, 1)
+
+    @property
+    def htmlfile(self):
+        return '.'.join(self.filename.split('.')[:-1]) + '.html'
diff --git a/panfry/templater.py b/panfry/templater.py
new file mode 100755
index 0000000..c420a12
--- /dev/null
+++ b/panfry/templater.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from jinja2 import Environment, FileSystemLoader
+
+
+class Templater:
+    ''' Pandoc Template generator.
+    Creates a template suitible for passing to pandoc for html generation.
+
+    The most prominent feature is adding the custom table of contents
+    navigator that navigates accross muliple html pages.
+
+    The generator references a jinja2 template file called html5.template
+    located either in the directory 'templates' under the source path,
+    or in the directory passed as an option.
+    '''
+    def __init__(self, path):
+        self.templates_path = path
+        self.j2 = Environment(loader=FileSystemLoader(self.templates_path),
+                              line_statement_prefix='#')
+
+    def page_template(self, doc, page):
+        '''
+        Returns a new page template for use by pandoc.
+
+        Arguments:
+        - doc: Panfry.Document.
+        - page: Panfry.Page from Panfry.Document to generate template for.
+        '''
+        options = dict(toc=doc.toc_links)
+        options['page'] = unicode(page.htmlfile, "utf8")
+        options['pdf'] = unicode(doc.pdf_filename)
+        options['prev'] = u''
+        options['next'] = u''
+        prevpage = doc.prev_page(page)
+        nextpage = doc.next_page(page)
+        if prevpage:
+            options['prev'] = unicode(prevpage.htmlfile, "utf8")
+        if nextpage:
+            options['next'] = unicode(nextpage.htmlfile, "utf8")
+
+        template = self.j2.get_template('html5.template')
+
+        return template.render(options=options).encode('utf-8')
diff --git a/panfry/util.py b/panfry/util.py
new file mode 100755
index 0000000..c711599
--- /dev/null
+++ b/panfry/util.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import shutil
+import re
+import argparse
+import pandoc
+import cli
+from tempfile import NamedTemporaryFile
+from jinja2 import Environment, FileSystemLoader
+
+
+class Templater:
+    ''' Pandoc Template generator.
+    Creates a template suitible for passing to pandoc for html generation.
+
+    The most prominent feature is adding the custom table of contents
+    navigator that navigates accross muliple html pages.
+
+    The generator references a jinja2 template file called html5.template
+    located either in the directory 'templates' under the source path,
+    or in the directory passed as an option.
+    '''
+    def __init__(self, path):
+        self.templates_path = path
+        self.j2 = Environment(loader=FileSystemLoader(self.templates_path),
+                              line_statement_prefix='#')
+
+    def page_template(self, doc, page):
+        '''
+        Returns a new page template for use by pandoc.
+
+        Arguments:
+        - doc: Panfry.Document.
+        - page: Panfry.Page from Panfry.Document to generate template for.
+        '''
+        options = dict(toc=doc.toc_links)
+        options['page'] = unicode(page.htmlfile, "utf8")
+        options['pdf'] = unicode(doc.pdf_filename)
+        options['prev'] = u''
+        options['next'] = u''
+        prevpage = doc.prev_page(page)
+        nextpage = doc.next_page(page)
+        if prevpage:
+            options['prev'] = unicode(prevpage.htmlfile, "utf8")
+        if nextpage:
+            options['next'] = unicode(nextpage.htmlfile, "utf8")
+
+        template = self.j2.get_template('html5.template')
+
+        return template.render(options=options).encode('utf-8')
+
+
+class Page:
+    '''
+    Represents single source file.
+    '''
+    def __init__(self, filename, source):
+        self.filename = filename
+        self.source = source
+
+    @property
+    def title(self):
+        '''
+        Returns a tuple containing a string representing the page title
+        and an integer representing the level in the document outline
+        hierarchy.
+
+        The level is preserved so that page titles can be properly
+        positioned (indented) in the table of contents.
+
+        If the page has a pandoc title block, the title is retreived
+        from there and returned with a level of 1
+
+        If a title block is not found, the first heading is returned
+        with the corrisponding heading level.
+
+        Finally, if no title block or headings are found in the page,
+        the filename is returned as the title with underscores changed
+        to spaces.
+        '''
+        title = ''
+        for line in get_lines(self.source):
+            if line.startswith('% '):
+                title = line.split(' ', 1)[1].strip()
+                if '(' and ')' in line:
+                    num = line.split('(')[1][0]
+                    return 'man(%s) %s' % (num, title.split('(')[0].strip())
+                else:
+                    return (title, 1)
+            if re.match('[=]{2}', line):
+                return (title, 1)
+            if re.match('[-]{2}', line):
+                return (title, 2)
+            if re.match('#+.+[A-z|0-9]', line):
+                level = len(re.match('#+', line).group())
+                return (line.split(' ', 1)[1].strip(), level)
+            title = line.strip()
+
+        title = self.filename.replace('_', ' ')
+
+        return (title, 1)
+
+    @property
+    def htmlfile(self):
+        return '.'.join(self.filename.split('.')[:-1]) + '.html'
+
+
+class Document:
+
+    def __init__(self, path):
+        self.src_path = path
+        self.meta = self.get_meta(path)
+        self.pages = self.get_pages(path)
+
+    css_file = 'css/style.css'
+    workdir = 'stdocs-work'
+
+    def get_pages(self, path):
+        pages = []
+        filelist = get_lines(self.meta['TOC'])
+        for filename in filelist:
+            source = read_file(os.path.join(path, filename))
+            if source:
+                pages.append(Page(filename, source))
+            else:
+                print("WARNING: Page: %s not found or is empty." % filename)
+
+        return pages
+
+    def get_meta(self, path):
+        metafiles = ['TOC', 'HEADER']
+        meta = {}
+        for filename in metafiles:
+            source = read_file(os.path.join(path, filename))
+            if source:
+                meta[filename] = source
+            else:
+                print("!E: %s not found or is empty. Aborting..." % filename)
+                exit(1)
+
+        return meta
+
+    def set_templater(self, templater):
+        self.templater = templater
+
+    def next_page(self, page):
+        try:
+            idx = self.pages.index(page)
+        except:
+            return ''
+        if idx >= len(self.pages) - 1:
+            return ''
+        return self.pages[idx+1]
+
+    def prev_page(self, page):
+        try:
+            idx = self.pages.index(page)
+        except:
+            return ''
+        if idx == 0:
+            return ''
+        return self.pages[idx-1]
+
+    @property
+    def toc_links(self):
+        '''
+        Returns a list of dictionaries. Each dictionary element contains
+        a page title and the html file name.
+        '''
+        links = []
+        for page in self.pages:
+            title, level = page.title
+            links.append(dict(link=unicode(page.htmlfile, "utf8"),
+                              text=unicode(title, "utf8"),
+                              level=level,
+                              ))
+
+        return links
+
+    @property
+    def pdf_filename(self):
+        header = self.meta['HEADER']
+        m = re.match('(^%)(.*\n.*)(%*)', header)
+        if m:
+            title = m.group(2).strip()
+            title = re.sub(r'\s+', ' ', title)
+            title = re.sub(r'[\n ]', '_', title)
+            title = re.sub(r'[:,]', '-', title)
+            title = re.sub(r'_+', '_', title)
+            title = re.sub(r'-_', '-', title)
+            title = re.sub(r'-+', '-', title)
+        else:
+            title = os.path.split(self.src_path)[1]
+
+        return title + '.pdf'
+
+    def publish_pdf(self, pub_path):
+        pdf_path = os.path.join(pub_path, self.pdf_filename)
+        src = self.meta['HEADER']
+        for page in self.pages:
+            src += '\n%s' % page.source
+
+        doc = pandoc.Document()
+        doc.markdown = src
+
+        pandoc.set_cwd(os.path.abspath(self.src_path))
+        doc.to_file(pdf_path)
+        pandoc.set_cwd(None)
+
+        return self.pdf_filename
+
+    def publish_css(self, pub_path):
+        src = os.path.join(self.src_path, 'css')
+        dst = os.path.join(pub_path, 'css')
+        if os.path.isdir(src):
+            copy(src, dst)
+
+    def publish_images(self, pub_path):
+        src_path = self.src_path
+        if os.path.exists(os.path.join(src_path, 'images/html')):
+            src = os.path.join(src_path, 'images/html')
+        else:
+            src = os.path.join(src_path, 'images')
+
+        dst = os.path.join(pub_path, 'images')
+        if os.path.exists(src):
+            copy(src, dst)
+
+    def publish_html(self, pub_path):
+        pandoc.set_cwd(None)
+        for page in self.pages:
+            print("generating %s..." % page.htmlfile)
+            template_file = NamedTemporaryFile(mode='w',
+                                               suffix='pf.template',
+                                               delete=False)
+            template = self.templater.page_template(self, page)
+            template_file.write(template)
+            template_file.close()
+            doc = pandoc.Document()
+            doc.add_argument('toc')
+            doc.add_argument('template=%s' % template_file.name)
+            doc.add_argument('css=%s' % self.css_file)
+            doc.markdown = page.source
+            content = doc.html
+            write_file(os.path.join(pub_path, page.htmlfile),
+                       unicode(content, 'utf-8'))
+
+        # If there is not explicit index.html, then link 'index.html'
+        # to the toplevel page.
+        if not 'index.md' in self.pages:
+            src = os.path.join(self.pages[0].htmlfile)
+            ref = os.path.join(pub_path, 'index.html')
+            os.symlink(src, ref)
+
+        ###### Copy any images to publish directory
+        self.publish_css(pub_path)
+        self.publish_images(pub_path)
+
+
+def copy(src, dst, ignore=None):
+    if os.path.isdir(src):
+        shutil.copytree(src, dst, True, ignore)
+    else:
+        shutil.copy(src, dst)
+
+
+def open_file(path, mode='r'):
+    if mode == 'w' or os.path.isfile(path):
+        return open(path, mode)
+    else:
+        return None
+
+
+def write_file(path, content):
+    fd = open_file(path, mode='w')
+    if fd:
+        fd.write(content.encode('utf-8'))
+        fd.close()
+        return path
+    else:
+        return None
+
+
+def read_file(path):
+    content = ''
+    fd = open_file(path)
+    if fd:
+        content = fd.read()
+        fd.close()
+        return content
+    else:
+        print("Error: could not open %s" % path)
+        return content
+
+
+def read_file_lines(path):
+    content = []
+    fd = open_file(path)
+    if fd:
+        content = fd.readlines()
+        fd.close()
+    return content
+
+
+def get_lines(content):
+    '''
+    return list of content split by line.
+    Leading/trailing blank lines are not reserved.
+    '''
+    return content.strip().split('\n')
+
+
+def init_argparser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('src_path', nargs='?',
+                        default='.',
+                        help='Path of document sources')
+    parser.add_argument('pub_path', nargs='?',
+                        default='./pub',
+                        help='Directory to place output')
+    parser.add_argument('-T', '--templates', dest='templates_path',
+                        default='',
+                        help='Directory to find templates')
+    parser.add_argument('-C', '--css', dest='css_file',
+                        default='',
+                        help='css file for html pages',
+                        )
+    return parser
+
+
+def get_env():
+    env = cli.init_argparser().parse_args()
+    if not os.path.isdir(env.src_path):
+        print("%s directory not found. Aborting..." % env.src_path)
+        exit(1)
+
+    env.pub_path = os.path.abspath(env.pub_path)
+
+    if not env.templates_path:
+        env.templates_path = os.path.join(env.src_path, 'templates')
+
+    if not os.path.isdir(env.templates_path):
+        print("No templates path found. Aborting...")
+        exit(1)
+
+    return env
+
+if __name__ == "__main__":
+    env = get_env()
+    if os.path.exists(env.pub_path):
+        shutil.rmtree(env.pub_path)
+    os.mkdir(env.pub_path)
+
+    document = Document(env.src_path)
+    document.set_templater(Templater(env.templates_path))
+
+    ###### Create PDF
+    pdffile = document.publish_pdf(env.pub_path)
+    print("Wrote PDF: %s" % pdffile)
+
+    ###### Create HTML
+    document.publish_html(env.pub_path)
+
+    exit(0)
-- 
cgit v1.2.3