#!/usr/bin/env python # -*- coding: utf-8 -*- import os import shutil import re import argparse import pandoc import cli from tempfile import NamedTemporaryFile from jinja2 import Environment, FileSystemLoader class Templater: ''' Pandoc Template generator. Creates a template suitible for passing to pandoc for html generation. The most prominent feature is adding the custom table of contents navigator that navigates accross muliple html pages. The generator references a jinja2 template file called html5.template located either in the directory 'templates' under the source path, or in the directory passed as an option. ''' def __init__(self, path): self.templates_path = path self.j2 = Environment(loader=FileSystemLoader(self.templates_path), line_statement_prefix='#') def page_template(self, doc, page): ''' Returns a new page template for use by pandoc. Arguments: - doc: Panfry.Document. - page: Panfry.Page from Panfry.Document to generate template for. ''' options = dict(toc=doc.toc_links) options['page'] = unicode(page.htmlfile, "utf8") options['pdf'] = unicode(doc.pdf_filename) options['prev'] = u'' options['next'] = u'' prevpage = doc.prev_page(page) nextpage = doc.next_page(page) if prevpage: options['prev'] = unicode(prevpage.htmlfile, "utf8") if nextpage: options['next'] = unicode(nextpage.htmlfile, "utf8") template = self.j2.get_template('html5.template') return template.render(options=options).encode('utf-8') class Page: ''' Represents single source file. ''' def __init__(self, filename, source): self.filename = filename self.source = source @property def title(self): ''' Returns a tuple containing a string representing the page title and an integer representing the level in the document outline hierarchy. The level is preserved so that page titles can be properly positioned (indented) in the table of contents. If the page has a pandoc title block, the title is retreived from there and returned with a level of 1 If a title block is not found, the first heading is returned with the corrisponding heading level. Finally, if no title block or headings are found in the page, the filename is returned as the title with underscores changed to spaces. ''' title = '' for line in get_lines(self.source): if line.startswith('% '): title = line.split(' ', 1)[1].strip() if '(' and ')' in line: num = line.split('(')[1][0] return 'man(%s) %s' % (num, title.split('(')[0].strip()) else: return (title, 1) if re.match('[=]{2}', line): return (title, 1) if re.match('[-]{2}', line): return (title, 2) if re.match('#+.+[A-z|0-9]', line): level = len(re.match('#+', line).group()) return (line.split(' ', 1)[1].strip(), level) title = line.strip() title = self.filename.replace('_', ' ') return (title, 1) @property def htmlfile(self): return '.'.join(self.filename.split('.')[:-1]) + '.html' class Document: def __init__(self, path): self.src_path = path self.meta = self.get_meta(path) self.pages = self.get_pages(path) css_file = 'css/style.css' workdir = 'stdocs-work' def get_pages(self, path): pages = [] filelist = get_lines(self.meta['TOC']) for filename in filelist: source = read_file(os.path.join(path, filename)) if source: pages.append(Page(filename, source)) else: print("WARNING: Page: %s not found or is empty." % filename) return pages def get_meta(self, path): metafiles = ['TOC', 'HEADER'] meta = {} for filename in metafiles: source = read_file(os.path.join(path, filename)) if source: meta[filename] = source else: print("!E: %s not found or is empty. Aborting..." % filename) exit(1) return meta def set_templater(self, templater): self.templater = templater def next_page(self, page): try: idx = self.pages.index(page) except: return '' if idx >= len(self.pages) - 1: return '' return self.pages[idx+1] def prev_page(self, page): try: idx = self.pages.index(page) except: return '' if idx == 0: return '' return self.pages[idx-1] @property def toc_links(self): ''' Returns a list of dictionaries. Each dictionary element contains a page title and the html file name. ''' links = [] for page in self.pages: title, level = page.title links.append(dict(link=unicode(page.htmlfile, "utf8"), text=unicode(title, "utf8"), level=level, )) return links @property def pdf_filename(self): header = self.meta['HEADER'] m = re.match('(^%)(.*\n.*)(%*)', header) if m: title = m.group(2).strip() title = re.sub(r'\s+', ' ', title) title = re.sub(r'[\n ]', '_', title) title = re.sub(r'[:,]', '-', title) title = re.sub(r'_+', '_', title) title = re.sub(r'-_', '-', title) title = re.sub(r'-+', '-', title) else: title = os.path.split(self.src_path)[1] return title + '.pdf' def publish_pdf(self, pub_path): pdf_path = os.path.join(pub_path, self.pdf_filename) src = self.meta['HEADER'] for page in self.pages: src += '\n%s' % page.source doc = pandoc.Document() doc.markdown = src pandoc.set_cwd(os.path.abspath(self.src_path)) doc.to_file(pdf_path) pandoc.set_cwd(None) return self.pdf_filename def publish_css(self, pub_path): src = os.path.join(self.src_path, 'css') dst = os.path.join(pub_path, 'css') if os.path.isdir(src): copy(src, dst) def publish_images(self, pub_path): src_path = self.src_path if os.path.exists(os.path.join(src_path, 'images/html')): src = os.path.join(src_path, 'images/html') else: src = os.path.join(src_path, 'images') dst = os.path.join(pub_path, 'images') if os.path.exists(src): copy(src, dst) def publish_html(self, pub_path): pandoc.set_cwd(None) for page in self.pages: print("generating %s..." % page.htmlfile) template_file = NamedTemporaryFile(mode='w', suffix='pf.template', delete=False) template = self.templater.page_template(self, page) template_file.write(template) template_file.close() doc = pandoc.Document() doc.add_argument('toc') doc.add_argument('template=%s' % template_file.name) doc.add_argument('css=%s' % self.css_file) doc.markdown = page.source content = doc.html write_file(os.path.join(pub_path, page.htmlfile), unicode(content, 'utf-8')) # If there is not explicit index.html, then link 'index.html' # to the toplevel page. if not 'index.md' in self.pages: src = os.path.join(self.pages[0].htmlfile) ref = os.path.join(pub_path, 'index.html') os.symlink(src, ref) ###### Copy any images to publish directory self.publish_css(pub_path) self.publish_images(pub_path) def copy(src, dst, ignore=None): if os.path.isdir(src): shutil.copytree(src, dst, True, ignore) else: shutil.copy(src, dst) def open_file(path, mode='r'): if mode == 'w' or os.path.isfile(path): return open(path, mode) else: return None def write_file(path, content): fd = open_file(path, mode='w') if fd: fd.write(content.encode('utf-8')) fd.close() return path else: return None def read_file(path): content = '' fd = open_file(path) if fd: content = fd.read() fd.close() return content else: print("Error: could not open %s" % path) return content def read_file_lines(path): content = [] fd = open_file(path) if fd: content = fd.readlines() fd.close() return content def get_lines(content): ''' return list of content split by line. Leading/trailing blank lines are not reserved. ''' return content.strip().split('\n') def init_argparser(): parser = argparse.ArgumentParser() parser.add_argument('src_path', nargs='?', default='.', help='Path of document sources') parser.add_argument('pub_path', nargs='?', default='./pub', help='Directory to place output') parser.add_argument('-T', '--templates', dest='templates_path', default='', help='Directory to find templates') parser.add_argument('-C', '--css', dest='css_file', default='', help='css file for html pages', ) return parser def get_env(): env = cli.init_argparser().parse_args() if not os.path.isdir(env.src_path): print("%s directory not found. Aborting..." % env.src_path) exit(1) env.pub_path = os.path.abspath(env.pub_path) if not env.templates_path: env.templates_path = os.path.join(env.src_path, 'templates') if not os.path.isdir(env.templates_path): print("No templates path found. Aborting...") exit(1) return env if __name__ == "__main__": env = get_env() if os.path.exists(env.pub_path): shutil.rmtree(env.pub_path) os.mkdir(env.pub_path) document = Document(env.src_path) document.set_templater(Templater(env.templates_path)) ###### Create PDF pdffile = document.publish_pdf(env.pub_path) print("Wrote PDF: %s" % pdffile) ###### Create HTML document.publish_html(env.pub_path) exit(0)