initial commit

author: Scott Bahling <sbahling@mudgum.net> 2013-03-17 21:26:02 +0100
committer: Scott Bahling <sbahling@mudgum.net> 2013-03-17 21:26:02 +0100
commit: bf0b3f2a096832a9063f7816dcc14a0607f67271 (patch)
tree: efff29003b44d8c471587f751740b6f9eac48478 /panfry/util.py
download: panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.gz
panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.xz
panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.zip
1 files changed, 365 insertions, 0 deletions
diff --git a/panfry/util.py b/panfry/util.py
new file mode 100755
index 0000000..c711599
--- /dev/null
+++ b/panfry/util.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import shutil
+import re
+import argparse
+import pandoc
+import cli
+from tempfile import NamedTemporaryFile
+from jinja2 import Environment, FileSystemLoader
+
+
+class Templater:
+    ''' Pandoc Template generator.
+    Creates a template suitible for passing to pandoc for html generation.
+
+    The most prominent feature is adding the custom table of contents
+    navigator that navigates accross muliple html pages.
+
+    The generator references a jinja2 template file called html5.template
+    located either in the directory 'templates' under the source path,
+    or in the directory passed as an option.
+    '''
+    def __init__(self, path):
+        self.templates_path = path
+        self.j2 = Environment(loader=FileSystemLoader(self.templates_path),
+                              line_statement_prefix='#')
+
+    def page_template(self, doc, page):
+        '''
+        Returns a new page template for use by pandoc.
+
+        Arguments:
+        - doc: Panfry.Document.
+        - page: Panfry.Page from Panfry.Document to generate template for.
+        '''
+        options = dict(toc=doc.toc_links)
+        options['page'] = unicode(page.htmlfile, "utf8")
+        options['pdf'] = unicode(doc.pdf_filename)
+        options['prev'] = u''
+        options['next'] = u''
+        prevpage = doc.prev_page(page)
+        nextpage = doc.next_page(page)
+        if prevpage:
+            options['prev'] = unicode(prevpage.htmlfile, "utf8")
+        if nextpage:
+            options['next'] = unicode(nextpage.htmlfile, "utf8")
+
+        template = self.j2.get_template('html5.template')
+
+        return template.render(options=options).encode('utf-8')
+
+
+class Page:
+    '''
+    Represents single source file.
+    '''
+    def __init__(self, filename, source):
+        self.filename = filename
+        self.source = source
+
+    @property
+    def title(self):
+        '''
+        Returns a tuple containing a string representing the page title
+        and an integer representing the level in the document outline
+        hierarchy.
+
+        The level is preserved so that page titles can be properly
+        positioned (indented) in the table of contents.
+
+        If the page has a pandoc title block, the title is retreived
+        from there and returned with a level of 1
+
+        If a title block is not found, the first heading is returned
+        with the corrisponding heading level.
+
+        Finally, if no title block or headings are found in the page,
+        the filename is returned as the title with underscores changed
+        to spaces.
+        '''
+        title = ''
+        for line in get_lines(self.source):
+            if line.startswith('% '):
+                title = line.split(' ', 1)[1].strip()
+                if '(' and ')' in line:
+                    num = line.split('(')[1][0]
+                    return 'man(%s) %s' % (num, title.split('(')[0].strip())
+                else:
+                    return (title, 1)
+            if re.match('[=]{2}', line):
+                return (title, 1)
+            if re.match('[-]{2}', line):
+                return (title, 2)
+            if re.match('#+.+[A-z|0-9]', line):
+                level = len(re.match('#+', line).group())
+                return (line.split(' ', 1)[1].strip(), level)
+            title = line.strip()
+
+        title = self.filename.replace('_', ' ')
+
+        return (title, 1)
+
+    @property
+    def htmlfile(self):
+        return '.'.join(self.filename.split('.')[:-1]) + '.html'
+
+
+class Document:
+
+    def __init__(self, path):
+        self.src_path = path
+        self.meta = self.get_meta(path)
+        self.pages = self.get_pages(path)
+
+    css_file = 'css/style.css'
+    workdir = 'stdocs-work'
+
+    def get_pages(self, path):
+        pages = []
+        filelist = get_lines(self.meta['TOC'])
+        for filename in filelist:
+            source = read_file(os.path.join(path, filename))
+            if source:
+                pages.append(Page(filename, source))
+            else:
+                print("WARNING: Page: %s not found or is empty." % filename)
+
+        return pages
+
+    def get_meta(self, path):
+        metafiles = ['TOC', 'HEADER']
+        meta = {}
+        for filename in metafiles:
+            source = read_file(os.path.join(path, filename))
+            if source:
+                meta[filename] = source
+            else:
+                print("!E: %s not found or is empty. Aborting..." % filename)
+                exit(1)
+
+        return meta
+
+    def set_templater(self, templater):
+        self.templater = templater
+
+    def next_page(self, page):
+        try:
+            idx = self.pages.index(page)
+        except:
+            return ''
+        if idx >= len(self.pages) - 1:
+            return ''
+        return self.pages[idx+1]
+
+    def prev_page(self, page):
+        try:
+            idx = self.pages.index(page)
+        except:
+            return ''
+        if idx == 0:
+            return ''
+        return self.pages[idx-1]
+
+    @property
+    def toc_links(self):
+        '''
+        Returns a list of dictionaries. Each dictionary element contains
+        a page title and the html file name.
+        '''
+        links = []
+        for page in self.pages:
+            title, level = page.title
+            links.append(dict(link=unicode(page.htmlfile, "utf8"),
+                              text=unicode(title, "utf8"),
+                              level=level,
+                              ))
+
+        return links
+
+    @property
+    def pdf_filename(self):
+        header = self.meta['HEADER']
+        m = re.match('(^%)(.*\n.*)(%*)', header)
+        if m:
+            title = m.group(2).strip()
+            title = re.sub(r'\s+', ' ', title)
+            title = re.sub(r'[\n ]', '_', title)
+            title = re.sub(r'[:,]', '-', title)
+            title = re.sub(r'_+', '_', title)
+            title = re.sub(r'-_', '-', title)
+            title = re.sub(r'-+', '-', title)
+        else:
+            title = os.path.split(self.src_path)[1]
+
+        return title + '.pdf'
+
+    def publish_pdf(self, pub_path):
+        pdf_path = os.path.join(pub_path, self.pdf_filename)
+        src = self.meta['HEADER']
+        for page in self.pages:
+            src += '\n%s' % page.source
+
+        doc = pandoc.Document()
+        doc.markdown = src
+
+        pandoc.set_cwd(os.path.abspath(self.src_path))
+        doc.to_file(pdf_path)
+        pandoc.set_cwd(None)
+
+        return self.pdf_filename
+
+    def publish_css(self, pub_path):
+        src = os.path.join(self.src_path, 'css')
+        dst = os.path.join(pub_path, 'css')
+        if os.path.isdir(src):
+            copy(src, dst)
+
+    def publish_images(self, pub_path):
+        src_path = self.src_path
+        if os.path.exists(os.path.join(src_path, 'images/html')):
+            src = os.path.join(src_path, 'images/html')
+        else:
+            src = os.path.join(src_path, 'images')
+
+        dst = os.path.join(pub_path, 'images')
+        if os.path.exists(src):
+            copy(src, dst)
+
+    def publish_html(self, pub_path):
+        pandoc.set_cwd(None)
+        for page in self.pages:
+            print("generating %s..." % page.htmlfile)
+            template_file = NamedTemporaryFile(mode='w',
+                                               suffix='pf.template',
+                                               delete=False)
+            template = self.templater.page_template(self, page)
+            template_file.write(template)
+            template_file.close()
+            doc = pandoc.Document()
+            doc.add_argument('toc')
+            doc.add_argument('template=%s' % template_file.name)
+            doc.add_argument('css=%s' % self.css_file)
+            doc.markdown = page.source
+            content = doc.html
+            write_file(os.path.join(pub_path, page.htmlfile),
+                       unicode(content, 'utf-8'))
+
+        # If there is not explicit index.html, then link 'index.html'
+        # to the toplevel page.
+        if not 'index.md' in self.pages:
+            src = os.path.join(self.pages[0].htmlfile)
+            ref = os.path.join(pub_path, 'index.html')
+            os.symlink(src, ref)
+
+        ###### Copy any images to publish directory
+        self.publish_css(pub_path)
+        self.publish_images(pub_path)
+
+
+def copy(src, dst, ignore=None):
+    if os.path.isdir(src):
+        shutil.copytree(src, dst, True, ignore)
+    else:
+        shutil.copy(src, dst)
+
+
+def open_file(path, mode='r'):
+    if mode == 'w' or os.path.isfile(path):
+        return open(path, mode)
+    else:
+        return None
+
+
+def write_file(path, content):
+    fd = open_file(path, mode='w')
+    if fd:
+        fd.write(content.encode('utf-8'))
+        fd.close()
+        return path
+    else:
+        return None
+
+
+def read_file(path):
+    content = ''
+    fd = open_file(path)
+    if fd:
+        content = fd.read()
+        fd.close()
+        return content
+    else:
+        print("Error: could not open %s" % path)
+        return content
+
+
+def read_file_lines(path):
+    content = []
+    fd = open_file(path)
+    if fd:
+        content = fd.readlines()
+        fd.close()
+    return content
+
+
+def get_lines(content):
+    '''
+    return list of content split by line.
+    Leading/trailing blank lines are not reserved.
+    '''
+    return content.strip().split('\n')
+
+
+def init_argparser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('src_path', nargs='?',
+                        default='.',
+                        help='Path of document sources')
+    parser.add_argument('pub_path', nargs='?',
+                        default='./pub',
+                        help='Directory to place output')
+    parser.add_argument('-T', '--templates', dest='templates_path',
+                        default='',
+                        help='Directory to find templates')
+    parser.add_argument('-C', '--css', dest='css_file',
+                        default='',
+                        help='css file for html pages',
+                        )
+    return parser
+
+
+def get_env():
+    env = cli.init_argparser().parse_args()
+    if not os.path.isdir(env.src_path):
+        print("%s directory not found. Aborting..." % env.src_path)
+        exit(1)
+
+    env.pub_path = os.path.abspath(env.pub_path)
+
+    if not env.templates_path:
+        env.templates_path = os.path.join(env.src_path, 'templates')
+
+    if not os.path.isdir(env.templates_path):
+        print("No templates path found. Aborting...")
+        exit(1)
+
+    return env
+
+if __name__ == "__main__":
+    env = get_env()
+    if os.path.exists(env.pub_path):
+        shutil.rmtree(env.pub_path)
+    os.mkdir(env.pub_path)
+
+    document = Document(env.src_path)
+    document.set_templater(Templater(env.templates_path))
+
+    ###### Create PDF
+    pdffile = document.publish_pdf(env.pub_path)
+    print("Wrote PDF: %s" % pdffile)
+
+    ###### Create HTML
+    document.publish_html(env.pub_path)
+
+    exit(0)
author	Scott Bahling <sbahling@mudgum.net>	2013-03-17 21:26:02 +0100
committer	Scott Bahling <sbahling@mudgum.net>	2013-03-17 21:26:02 +0100
commit	bf0b3f2a096832a9063f7816dcc14a0607f67271 (patch)
tree	efff29003b44d8c471587f751740b6f9eac48478 /panfry/util.py
download	panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.gz panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.xz panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.zip