diff options
| author | Scott Bahling <sbahling@mudgum.net> | 2013-03-17 21:26:02 +0100 |
|---|---|---|
| committer | Scott Bahling <sbahling@mudgum.net> | 2013-03-17 21:26:02 +0100 |
| commit | bf0b3f2a096832a9063f7816dcc14a0607f67271 (patch) | |
| tree | efff29003b44d8c471587f751740b6f9eac48478 /panfry/util.py | |
| download | panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.gz panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.tar.xz panfry-bf0b3f2a096832a9063f7816dcc14a0607f67271.zip | |
initial commit
Diffstat (limited to 'panfry/util.py')
| -rwxr-xr-x | panfry/util.py | 365 |
1 files changed, 365 insertions, 0 deletions
diff --git a/panfry/util.py b/panfry/util.py new file mode 100755 index 0000000..c711599 --- /dev/null +++ b/panfry/util.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import shutil +import re +import argparse +import pandoc +import cli +from tempfile import NamedTemporaryFile +from jinja2 import Environment, FileSystemLoader + + +class Templater: + ''' Pandoc Template generator. + Creates a template suitible for passing to pandoc for html generation. + + The most prominent feature is adding the custom table of contents + navigator that navigates accross muliple html pages. + + The generator references a jinja2 template file called html5.template + located either in the directory 'templates' under the source path, + or in the directory passed as an option. + ''' + def __init__(self, path): + self.templates_path = path + self.j2 = Environment(loader=FileSystemLoader(self.templates_path), + line_statement_prefix='#') + + def page_template(self, doc, page): + ''' + Returns a new page template for use by pandoc. + + Arguments: + - doc: Panfry.Document. + - page: Panfry.Page from Panfry.Document to generate template for. + ''' + options = dict(toc=doc.toc_links) + options['page'] = unicode(page.htmlfile, "utf8") + options['pdf'] = unicode(doc.pdf_filename) + options['prev'] = u'' + options['next'] = u'' + prevpage = doc.prev_page(page) + nextpage = doc.next_page(page) + if prevpage: + options['prev'] = unicode(prevpage.htmlfile, "utf8") + if nextpage: + options['next'] = unicode(nextpage.htmlfile, "utf8") + + template = self.j2.get_template('html5.template') + + return template.render(options=options).encode('utf-8') + + +class Page: + ''' + Represents single source file. + ''' + def __init__(self, filename, source): + self.filename = filename + self.source = source + + @property + def title(self): + ''' + Returns a tuple containing a string representing the page title + and an integer representing the level in the document outline + hierarchy. + + The level is preserved so that page titles can be properly + positioned (indented) in the table of contents. + + If the page has a pandoc title block, the title is retreived + from there and returned with a level of 1 + + If a title block is not found, the first heading is returned + with the corrisponding heading level. + + Finally, if no title block or headings are found in the page, + the filename is returned as the title with underscores changed + to spaces. + ''' + title = '' + for line in get_lines(self.source): + if line.startswith('% '): + title = line.split(' ', 1)[1].strip() + if '(' and ')' in line: + num = line.split('(')[1][0] + return 'man(%s) %s' % (num, title.split('(')[0].strip()) + else: + return (title, 1) + if re.match('[=]{2}', line): + return (title, 1) + if re.match('[-]{2}', line): + return (title, 2) + if re.match('#+.+[A-z|0-9]', line): + level = len(re.match('#+', line).group()) + return (line.split(' ', 1)[1].strip(), level) + title = line.strip() + + title = self.filename.replace('_', ' ') + + return (title, 1) + + @property + def htmlfile(self): + return '.'.join(self.filename.split('.')[:-1]) + '.html' + + +class Document: + + def __init__(self, path): + self.src_path = path + self.meta = self.get_meta(path) + self.pages = self.get_pages(path) + + css_file = 'css/style.css' + workdir = 'stdocs-work' + + def get_pages(self, path): + pages = [] + filelist = get_lines(self.meta['TOC']) + for filename in filelist: + source = read_file(os.path.join(path, filename)) + if source: + pages.append(Page(filename, source)) + else: + print("WARNING: Page: %s not found or is empty." % filename) + + return pages + + def get_meta(self, path): + metafiles = ['TOC', 'HEADER'] + meta = {} + for filename in metafiles: + source = read_file(os.path.join(path, filename)) + if source: + meta[filename] = source + else: + print("!E: %s not found or is empty. Aborting..." % filename) + exit(1) + + return meta + + def set_templater(self, templater): + self.templater = templater + + def next_page(self, page): + try: + idx = self.pages.index(page) + except: + return '' + if idx >= len(self.pages) - 1: + return '' + return self.pages[idx+1] + + def prev_page(self, page): + try: + idx = self.pages.index(page) + except: + return '' + if idx == 0: + return '' + return self.pages[idx-1] + + @property + def toc_links(self): + ''' + Returns a list of dictionaries. Each dictionary element contains + a page title and the html file name. + ''' + links = [] + for page in self.pages: + title, level = page.title + links.append(dict(link=unicode(page.htmlfile, "utf8"), + text=unicode(title, "utf8"), + level=level, + )) + + return links + + @property + def pdf_filename(self): + header = self.meta['HEADER'] + m = re.match('(^%)(.*\n.*)(%*)', header) + if m: + title = m.group(2).strip() + title = re.sub(r'\s+', ' ', title) + title = re.sub(r'[\n ]', '_', title) + title = re.sub(r'[:,]', '-', title) + title = re.sub(r'_+', '_', title) + title = re.sub(r'-_', '-', title) + title = re.sub(r'-+', '-', title) + else: + title = os.path.split(self.src_path)[1] + + return title + '.pdf' + + def publish_pdf(self, pub_path): + pdf_path = os.path.join(pub_path, self.pdf_filename) + src = self.meta['HEADER'] + for page in self.pages: + src += '\n%s' % page.source + + doc = pandoc.Document() + doc.markdown = src + + pandoc.set_cwd(os.path.abspath(self.src_path)) + doc.to_file(pdf_path) + pandoc.set_cwd(None) + + return self.pdf_filename + + def publish_css(self, pub_path): + src = os.path.join(self.src_path, 'css') + dst = os.path.join(pub_path, 'css') + if os.path.isdir(src): + copy(src, dst) + + def publish_images(self, pub_path): + src_path = self.src_path + if os.path.exists(os.path.join(src_path, 'images/html')): + src = os.path.join(src_path, 'images/html') + else: + src = os.path.join(src_path, 'images') + + dst = os.path.join(pub_path, 'images') + if os.path.exists(src): + copy(src, dst) + + def publish_html(self, pub_path): + pandoc.set_cwd(None) + for page in self.pages: + print("generating %s..." % page.htmlfile) + template_file = NamedTemporaryFile(mode='w', + suffix='pf.template', + delete=False) + template = self.templater.page_template(self, page) + template_file.write(template) + template_file.close() + doc = pandoc.Document() + doc.add_argument('toc') + doc.add_argument('template=%s' % template_file.name) + doc.add_argument('css=%s' % self.css_file) + doc.markdown = page.source + content = doc.html + write_file(os.path.join(pub_path, page.htmlfile), + unicode(content, 'utf-8')) + + # If there is not explicit index.html, then link 'index.html' + # to the toplevel page. + if not 'index.md' in self.pages: + src = os.path.join(self.pages[0].htmlfile) + ref = os.path.join(pub_path, 'index.html') + os.symlink(src, ref) + + ###### Copy any images to publish directory + self.publish_css(pub_path) + self.publish_images(pub_path) + + +def copy(src, dst, ignore=None): + if os.path.isdir(src): + shutil.copytree(src, dst, True, ignore) + else: + shutil.copy(src, dst) + + +def open_file(path, mode='r'): + if mode == 'w' or os.path.isfile(path): + return open(path, mode) + else: + return None + + +def write_file(path, content): + fd = open_file(path, mode='w') + if fd: + fd.write(content.encode('utf-8')) + fd.close() + return path + else: + return None + + +def read_file(path): + content = '' + fd = open_file(path) + if fd: + content = fd.read() + fd.close() + return content + else: + print("Error: could not open %s" % path) + return content + + +def read_file_lines(path): + content = [] + fd = open_file(path) + if fd: + content = fd.readlines() + fd.close() + return content + + +def get_lines(content): + ''' + return list of content split by line. + Leading/trailing blank lines are not reserved. + ''' + return content.strip().split('\n') + + +def init_argparser(): + parser = argparse.ArgumentParser() + parser.add_argument('src_path', nargs='?', + default='.', + help='Path of document sources') + parser.add_argument('pub_path', nargs='?', + default='./pub', + help='Directory to place output') + parser.add_argument('-T', '--templates', dest='templates_path', + default='', + help='Directory to find templates') + parser.add_argument('-C', '--css', dest='css_file', + default='', + help='css file for html pages', + ) + return parser + + +def get_env(): + env = cli.init_argparser().parse_args() + if not os.path.isdir(env.src_path): + print("%s directory not found. Aborting..." % env.src_path) + exit(1) + + env.pub_path = os.path.abspath(env.pub_path) + + if not env.templates_path: + env.templates_path = os.path.join(env.src_path, 'templates') + + if not os.path.isdir(env.templates_path): + print("No templates path found. Aborting...") + exit(1) + + return env + +if __name__ == "__main__": + env = get_env() + if os.path.exists(env.pub_path): + shutil.rmtree(env.pub_path) + os.mkdir(env.pub_path) + + document = Document(env.src_path) + document.set_templater(Templater(env.templates_path)) + + ###### Create PDF + pdffile = document.publish_pdf(env.pub_path) + print("Wrote PDF: %s" % pdffile) + + ###### Create HTML + document.publish_html(env.pub_path) + + exit(0) |
