diff options
Diffstat (limited to 'panfry')
| -rwxr-xr-x | panfry/util.py | 308 |
1 files changed, 0 insertions, 308 deletions
diff --git a/panfry/util.py b/panfry/util.py index c711599..5909eef 100755 --- a/panfry/util.py +++ b/panfry/util.py @@ -2,260 +2,6 @@ # -*- coding: utf-8 -*- import os import shutil -import re -import argparse -import pandoc -import cli -from tempfile import NamedTemporaryFile -from jinja2 import Environment, FileSystemLoader - - -class Templater: - ''' Pandoc Template generator. - Creates a template suitible for passing to pandoc for html generation. - - The most prominent feature is adding the custom table of contents - navigator that navigates accross muliple html pages. - - The generator references a jinja2 template file called html5.template - located either in the directory 'templates' under the source path, - or in the directory passed as an option. - ''' - def __init__(self, path): - self.templates_path = path - self.j2 = Environment(loader=FileSystemLoader(self.templates_path), - line_statement_prefix='#') - - def page_template(self, doc, page): - ''' - Returns a new page template for use by pandoc. - - Arguments: - - doc: Panfry.Document. - - page: Panfry.Page from Panfry.Document to generate template for. - ''' - options = dict(toc=doc.toc_links) - options['page'] = unicode(page.htmlfile, "utf8") - options['pdf'] = unicode(doc.pdf_filename) - options['prev'] = u'' - options['next'] = u'' - prevpage = doc.prev_page(page) - nextpage = doc.next_page(page) - if prevpage: - options['prev'] = unicode(prevpage.htmlfile, "utf8") - if nextpage: - options['next'] = unicode(nextpage.htmlfile, "utf8") - - template = self.j2.get_template('html5.template') - - return template.render(options=options).encode('utf-8') - - -class Page: - ''' - Represents single source file. - ''' - def __init__(self, filename, source): - self.filename = filename - self.source = source - - @property - def title(self): - ''' - Returns a tuple containing a string representing the page title - and an integer representing the level in the document outline - hierarchy. - - The level is preserved so that page titles can be properly - positioned (indented) in the table of contents. - - If the page has a pandoc title block, the title is retreived - from there and returned with a level of 1 - - If a title block is not found, the first heading is returned - with the corrisponding heading level. - - Finally, if no title block or headings are found in the page, - the filename is returned as the title with underscores changed - to spaces. - ''' - title = '' - for line in get_lines(self.source): - if line.startswith('% '): - title = line.split(' ', 1)[1].strip() - if '(' and ')' in line: - num = line.split('(')[1][0] - return 'man(%s) %s' % (num, title.split('(')[0].strip()) - else: - return (title, 1) - if re.match('[=]{2}', line): - return (title, 1) - if re.match('[-]{2}', line): - return (title, 2) - if re.match('#+.+[A-z|0-9]', line): - level = len(re.match('#+', line).group()) - return (line.split(' ', 1)[1].strip(), level) - title = line.strip() - - title = self.filename.replace('_', ' ') - - return (title, 1) - - @property - def htmlfile(self): - return '.'.join(self.filename.split('.')[:-1]) + '.html' - - -class Document: - - def __init__(self, path): - self.src_path = path - self.meta = self.get_meta(path) - self.pages = self.get_pages(path) - - css_file = 'css/style.css' - workdir = 'stdocs-work' - - def get_pages(self, path): - pages = [] - filelist = get_lines(self.meta['TOC']) - for filename in filelist: - source = read_file(os.path.join(path, filename)) - if source: - pages.append(Page(filename, source)) - else: - print("WARNING: Page: %s not found or is empty." % filename) - - return pages - - def get_meta(self, path): - metafiles = ['TOC', 'HEADER'] - meta = {} - for filename in metafiles: - source = read_file(os.path.join(path, filename)) - if source: - meta[filename] = source - else: - print("!E: %s not found or is empty. Aborting..." % filename) - exit(1) - - return meta - - def set_templater(self, templater): - self.templater = templater - - def next_page(self, page): - try: - idx = self.pages.index(page) - except: - return '' - if idx >= len(self.pages) - 1: - return '' - return self.pages[idx+1] - - def prev_page(self, page): - try: - idx = self.pages.index(page) - except: - return '' - if idx == 0: - return '' - return self.pages[idx-1] - - @property - def toc_links(self): - ''' - Returns a list of dictionaries. Each dictionary element contains - a page title and the html file name. - ''' - links = [] - for page in self.pages: - title, level = page.title - links.append(dict(link=unicode(page.htmlfile, "utf8"), - text=unicode(title, "utf8"), - level=level, - )) - - return links - - @property - def pdf_filename(self): - header = self.meta['HEADER'] - m = re.match('(^%)(.*\n.*)(%*)', header) - if m: - title = m.group(2).strip() - title = re.sub(r'\s+', ' ', title) - title = re.sub(r'[\n ]', '_', title) - title = re.sub(r'[:,]', '-', title) - title = re.sub(r'_+', '_', title) - title = re.sub(r'-_', '-', title) - title = re.sub(r'-+', '-', title) - else: - title = os.path.split(self.src_path)[1] - - return title + '.pdf' - - def publish_pdf(self, pub_path): - pdf_path = os.path.join(pub_path, self.pdf_filename) - src = self.meta['HEADER'] - for page in self.pages: - src += '\n%s' % page.source - - doc = pandoc.Document() - doc.markdown = src - - pandoc.set_cwd(os.path.abspath(self.src_path)) - doc.to_file(pdf_path) - pandoc.set_cwd(None) - - return self.pdf_filename - - def publish_css(self, pub_path): - src = os.path.join(self.src_path, 'css') - dst = os.path.join(pub_path, 'css') - if os.path.isdir(src): - copy(src, dst) - - def publish_images(self, pub_path): - src_path = self.src_path - if os.path.exists(os.path.join(src_path, 'images/html')): - src = os.path.join(src_path, 'images/html') - else: - src = os.path.join(src_path, 'images') - - dst = os.path.join(pub_path, 'images') - if os.path.exists(src): - copy(src, dst) - - def publish_html(self, pub_path): - pandoc.set_cwd(None) - for page in self.pages: - print("generating %s..." % page.htmlfile) - template_file = NamedTemporaryFile(mode='w', - suffix='pf.template', - delete=False) - template = self.templater.page_template(self, page) - template_file.write(template) - template_file.close() - doc = pandoc.Document() - doc.add_argument('toc') - doc.add_argument('template=%s' % template_file.name) - doc.add_argument('css=%s' % self.css_file) - doc.markdown = page.source - content = doc.html - write_file(os.path.join(pub_path, page.htmlfile), - unicode(content, 'utf-8')) - - # If there is not explicit index.html, then link 'index.html' - # to the toplevel page. - if not 'index.md' in self.pages: - src = os.path.join(self.pages[0].htmlfile) - ref = os.path.join(pub_path, 'index.html') - os.symlink(src, ref) - - ###### Copy any images to publish directory - self.publish_css(pub_path) - self.publish_images(pub_path) def copy(src, dst, ignore=None): @@ -309,57 +55,3 @@ def get_lines(content): Leading/trailing blank lines are not reserved. ''' return content.strip().split('\n') - - -def init_argparser(): - parser = argparse.ArgumentParser() - parser.add_argument('src_path', nargs='?', - default='.', - help='Path of document sources') - parser.add_argument('pub_path', nargs='?', - default='./pub', - help='Directory to place output') - parser.add_argument('-T', '--templates', dest='templates_path', - default='', - help='Directory to find templates') - parser.add_argument('-C', '--css', dest='css_file', - default='', - help='css file for html pages', - ) - return parser - - -def get_env(): - env = cli.init_argparser().parse_args() - if not os.path.isdir(env.src_path): - print("%s directory not found. Aborting..." % env.src_path) - exit(1) - - env.pub_path = os.path.abspath(env.pub_path) - - if not env.templates_path: - env.templates_path = os.path.join(env.src_path, 'templates') - - if not os.path.isdir(env.templates_path): - print("No templates path found. Aborting...") - exit(1) - - return env - -if __name__ == "__main__": - env = get_env() - if os.path.exists(env.pub_path): - shutil.rmtree(env.pub_path) - os.mkdir(env.pub_path) - - document = Document(env.src_path) - document.set_templater(Templater(env.templates_path)) - - ###### Create PDF - pdffile = document.publish_pdf(env.pub_path) - print("Wrote PDF: %s" % pdffile) - - ###### Create HTML - document.publish_html(env.pub_path) - - exit(0) |
