summaryrefslogtreecommitdiff
path: root/panfry/util.py
diff options
context:
space:
mode:
authorScott Bahling <sbahling@mudgum.net>2013-03-17 21:26:02 +0100
committerScott Bahling <sbahling@mudgum.net>2013-03-17 21:26:02 +0100
commitbf0b3f2a096832a9063f7816dcc14a0607f67271 (patch)
treeefff29003b44d8c471587f751740b6f9eac48478 /panfry/util.py
downloadpanfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.gz
panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.xz
panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.zip
initial commit
Diffstat (limited to 'panfry/util.py')
-rwxr-xr-xpanfry/util.py365
1 files changed, 365 insertions, 0 deletions
diff --git a/panfry/util.py b/panfry/util.py
new file mode 100755
index 0000000..c711599
--- /dev/null
+++ b/panfry/util.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import shutil
+import re
+import argparse
+import pandoc
+import cli
+from tempfile import NamedTemporaryFile
+from jinja2 import Environment, FileSystemLoader
+
+
+class Templater:
+ ''' Pandoc Template generator.
+ Creates a template suitible for passing to pandoc for html generation.
+
+ The most prominent feature is adding the custom table of contents
+ navigator that navigates accross muliple html pages.
+
+ The generator references a jinja2 template file called html5.template
+ located either in the directory 'templates' under the source path,
+ or in the directory passed as an option.
+ '''
+ def __init__(self, path):
+ self.templates_path = path
+ self.j2 = Environment(loader=FileSystemLoader(self.templates_path),
+ line_statement_prefix='#')
+
+ def page_template(self, doc, page):
+ '''
+ Returns a new page template for use by pandoc.
+
+ Arguments:
+ - doc: Panfry.Document.
+ - page: Panfry.Page from Panfry.Document to generate template for.
+ '''
+ options = dict(toc=doc.toc_links)
+ options['page'] = unicode(page.htmlfile, "utf8")
+ options['pdf'] = unicode(doc.pdf_filename)
+ options['prev'] = u''
+ options['next'] = u''
+ prevpage = doc.prev_page(page)
+ nextpage = doc.next_page(page)
+ if prevpage:
+ options['prev'] = unicode(prevpage.htmlfile, "utf8")
+ if nextpage:
+ options['next'] = unicode(nextpage.htmlfile, "utf8")
+
+ template = self.j2.get_template('html5.template')
+
+ return template.render(options=options).encode('utf-8')
+
+
+class Page:
+ '''
+ Represents single source file.
+ '''
+ def __init__(self, filename, source):
+ self.filename = filename
+ self.source = source
+
+ @property
+ def title(self):
+ '''
+ Returns a tuple containing a string representing the page title
+ and an integer representing the level in the document outline
+ hierarchy.
+
+ The level is preserved so that page titles can be properly
+ positioned (indented) in the table of contents.
+
+ If the page has a pandoc title block, the title is retreived
+ from there and returned with a level of 1
+
+ If a title block is not found, the first heading is returned
+ with the corrisponding heading level.
+
+ Finally, if no title block or headings are found in the page,
+ the filename is returned as the title with underscores changed
+ to spaces.
+ '''
+ title = ''
+ for line in get_lines(self.source):
+ if line.startswith('% '):
+ title = line.split(' ', 1)[1].strip()
+ if '(' and ')' in line:
+ num = line.split('(')[1][0]
+ return 'man(%s) %s' % (num, title.split('(')[0].strip())
+ else:
+ return (title, 1)
+ if re.match('[=]{2}', line):
+ return (title, 1)
+ if re.match('[-]{2}', line):
+ return (title, 2)
+ if re.match('#+.+[A-z|0-9]', line):
+ level = len(re.match('#+', line).group())
+ return (line.split(' ', 1)[1].strip(), level)
+ title = line.strip()
+
+ title = self.filename.replace('_', ' ')
+
+ return (title, 1)
+
+ @property
+ def htmlfile(self):
+ return '.'.join(self.filename.split('.')[:-1]) + '.html'
+
+
+class Document:
+
+ def __init__(self, path):
+ self.src_path = path
+ self.meta = self.get_meta(path)
+ self.pages = self.get_pages(path)
+
+ css_file = 'css/style.css'
+ workdir = 'stdocs-work'
+
+ def get_pages(self, path):
+ pages = []
+ filelist = get_lines(self.meta['TOC'])
+ for filename in filelist:
+ source = read_file(os.path.join(path, filename))
+ if source:
+ pages.append(Page(filename, source))
+ else:
+ print("WARNING: Page: %s not found or is empty." % filename)
+
+ return pages
+
+ def get_meta(self, path):
+ metafiles = ['TOC', 'HEADER']
+ meta = {}
+ for filename in metafiles:
+ source = read_file(os.path.join(path, filename))
+ if source:
+ meta[filename] = source
+ else:
+ print("!E: %s not found or is empty. Aborting..." % filename)
+ exit(1)
+
+ return meta
+
+ def set_templater(self, templater):
+ self.templater = templater
+
+ def next_page(self, page):
+ try:
+ idx = self.pages.index(page)
+ except:
+ return ''
+ if idx >= len(self.pages) - 1:
+ return ''
+ return self.pages[idx+1]
+
+ def prev_page(self, page):
+ try:
+ idx = self.pages.index(page)
+ except:
+ return ''
+ if idx == 0:
+ return ''
+ return self.pages[idx-1]
+
+ @property
+ def toc_links(self):
+ '''
+ Returns a list of dictionaries. Each dictionary element contains
+ a page title and the html file name.
+ '''
+ links = []
+ for page in self.pages:
+ title, level = page.title
+ links.append(dict(link=unicode(page.htmlfile, "utf8"),
+ text=unicode(title, "utf8"),
+ level=level,
+ ))
+
+ return links
+
+ @property
+ def pdf_filename(self):
+ header = self.meta['HEADER']
+ m = re.match('(^%)(.*\n.*)(%*)', header)
+ if m:
+ title = m.group(2).strip()
+ title = re.sub(r'\s+', ' ', title)
+ title = re.sub(r'[\n ]', '_', title)
+ title = re.sub(r'[:,]', '-', title)
+ title = re.sub(r'_+', '_', title)
+ title = re.sub(r'-_', '-', title)
+ title = re.sub(r'-+', '-', title)
+ else:
+ title = os.path.split(self.src_path)[1]
+
+ return title + '.pdf'
+
+ def publish_pdf(self, pub_path):
+ pdf_path = os.path.join(pub_path, self.pdf_filename)
+ src = self.meta['HEADER']
+ for page in self.pages:
+ src += '\n%s' % page.source
+
+ doc = pandoc.Document()
+ doc.markdown = src
+
+ pandoc.set_cwd(os.path.abspath(self.src_path))
+ doc.to_file(pdf_path)
+ pandoc.set_cwd(None)
+
+ return self.pdf_filename
+
+ def publish_css(self, pub_path):
+ src = os.path.join(self.src_path, 'css')
+ dst = os.path.join(pub_path, 'css')
+ if os.path.isdir(src):
+ copy(src, dst)
+
+ def publish_images(self, pub_path):
+ src_path = self.src_path
+ if os.path.exists(os.path.join(src_path, 'images/html')):
+ src = os.path.join(src_path, 'images/html')
+ else:
+ src = os.path.join(src_path, 'images')
+
+ dst = os.path.join(pub_path, 'images')
+ if os.path.exists(src):
+ copy(src, dst)
+
+ def publish_html(self, pub_path):
+ pandoc.set_cwd(None)
+ for page in self.pages:
+ print("generating %s..." % page.htmlfile)
+ template_file = NamedTemporaryFile(mode='w',
+ suffix='pf.template',
+ delete=False)
+ template = self.templater.page_template(self, page)
+ template_file.write(template)
+ template_file.close()
+ doc = pandoc.Document()
+ doc.add_argument('toc')
+ doc.add_argument('template=%s' % template_file.name)
+ doc.add_argument('css=%s' % self.css_file)
+ doc.markdown = page.source
+ content = doc.html
+ write_file(os.path.join(pub_path, page.htmlfile),
+ unicode(content, 'utf-8'))
+
+ # If there is not explicit index.html, then link 'index.html'
+ # to the toplevel page.
+ if not 'index.md' in self.pages:
+ src = os.path.join(self.pages[0].htmlfile)
+ ref = os.path.join(pub_path, 'index.html')
+ os.symlink(src, ref)
+
+ ###### Copy any images to publish directory
+ self.publish_css(pub_path)
+ self.publish_images(pub_path)
+
+
+def copy(src, dst, ignore=None):
+ if os.path.isdir(src):
+ shutil.copytree(src, dst, True, ignore)
+ else:
+ shutil.copy(src, dst)
+
+
+def open_file(path, mode='r'):
+ if mode == 'w' or os.path.isfile(path):
+ return open(path, mode)
+ else:
+ return None
+
+
+def write_file(path, content):
+ fd = open_file(path, mode='w')
+ if fd:
+ fd.write(content.encode('utf-8'))
+ fd.close()
+ return path
+ else:
+ return None
+
+
+def read_file(path):
+ content = ''
+ fd = open_file(path)
+ if fd:
+ content = fd.read()
+ fd.close()
+ return content
+ else:
+ print("Error: could not open %s" % path)
+ return content
+
+
+def read_file_lines(path):
+ content = []
+ fd = open_file(path)
+ if fd:
+ content = fd.readlines()
+ fd.close()
+ return content
+
+
+def get_lines(content):
+ '''
+ return list of content split by line.
+ Leading/trailing blank lines are not reserved.
+ '''
+ return content.strip().split('\n')
+
+
+def init_argparser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('src_path', nargs='?',
+ default='.',
+ help='Path of document sources')
+ parser.add_argument('pub_path', nargs='?',
+ default='./pub',
+ help='Directory to place output')
+ parser.add_argument('-T', '--templates', dest='templates_path',
+ default='',
+ help='Directory to find templates')
+ parser.add_argument('-C', '--css', dest='css_file',
+ default='',
+ help='css file for html pages',
+ )
+ return parser
+
+
+def get_env():
+ env = cli.init_argparser().parse_args()
+ if not os.path.isdir(env.src_path):
+ print("%s directory not found. Aborting..." % env.src_path)
+ exit(1)
+
+ env.pub_path = os.path.abspath(env.pub_path)
+
+ if not env.templates_path:
+ env.templates_path = os.path.join(env.src_path, 'templates')
+
+ if not os.path.isdir(env.templates_path):
+ print("No templates path found. Aborting...")
+ exit(1)
+
+ return env
+
+if __name__ == "__main__":
+ env = get_env()
+ if os.path.exists(env.pub_path):
+ shutil.rmtree(env.pub_path)
+ os.mkdir(env.pub_path)
+
+ document = Document(env.src_path)
+ document.set_templater(Templater(env.templates_path))
+
+ ###### Create PDF
+ pdffile = document.publish_pdf(env.pub_path)
+ print("Wrote PDF: %s" % pdffile)
+
+ ###### Create HTML
+ document.publish_html(env.pub_path)
+
+ exit(0)