#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import print_function import os import re import pandoc from tempfile import NamedTemporaryFile from panfry.util import * from panfry.page import TextFile, Page from panfry.toc import TOCItem class Document: def __init__(self, path, subdoc=''): self.src_path = os.path.join(path, 'src', subdoc) self.meta = self.get_meta(self.src_path) self.pages = self.get_pages(self.src_path) self.full_toc = True self.standalone = True self.json_toc = False self.css_file = 'css/style.css' self.assets_dir = 'assets' self.pandoc_options = [] @property def set_simple_toc(self): self.full_toc = False @property def set_json_toc(self): self.json_toc = True self.standalone = False def set_pandoc_options(self, options): self.pandoc_options = options @property def set_full_toc(self): self.full_toc = True def set_templater(self, templater): self.templater = templater def expand_int_links(self, source): ''' Converts all internal links to full url links so that links reference accross the split up html pages Arguments: source = original markdown source returns: markdown source with expanded links ''' re_link1 = re.compile(r'(\[(?P[^\]^]+)\])\((?P#.*)\)', re.S) re_link2 = re.compile(r'(\[(?P[^\]^]+)\][^(])', re.S) re_link3 = re.compile(r'(\[(?P[^\]^]+)\])\[(?P[^\]].*)\]', re.S) source = re_link1.sub(self._get_cross_link, source) source = re_link2.sub(self._get_cross_link, source) source = re_link3.sub(self._get_cross_link, source) return source def _get_cross_link(self, m): if 'h' in m.groupdict(): heading = m.group('h') else: heading = m.group('s') for page in self.pages: for tocitem in page.toc: if tocitem.heading == heading.replace('\n', ' '): return '[%s](%s%s)' % (heading, page.htmlfile, tocitem.header_id, ) return '[%s]' % ref_txt @property def title(self): if self.title_block.firstline.startswith('%'): title = self.title_block.firstline.split('%')[1].strip() for line in self.title_block.lines[1:]: if line.startswith('%'): break title = '%s %s' % (title, line.strip()) return title else: return pages[0].title def get_pages(self, path): pages = [] filelist = self.toc.lines for filename in filelist: print("Reading in %s..." % filename) if not filename: # blank line in toc print("WARNING: %s contains blank lines" % self.toc.filename) continue source = read_file(os.path.join(path, filename)) if source: pages.append(Page(filename, source)) else: print("WARNING: Page: %s not found or is empty." % filename) return pages def get_meta(self, path): metafiles = ['TOC', 'TITLE_BLOCK'] meta = {} for filename in metafiles: source = read_file(os.path.join(path, filename)) if source: metafile = TextFile(filename, source) setattr(self, filename.lower(), metafile) else: print("!E: %s not found or is empty. Aborting..." % filename) exit(1) return meta def next_page(self, page): try: idx = self.pages.index(page) except: return '' if idx >= len(self.pages) - 1: return '' return self.pages[idx+1] def prev_page(self, page): try: idx = self.pages.index(page) except: return '' if idx == 0: return '' return self.pages[idx-1] @property def toc_links(self): ''' Returns a list of dictionaries. Each dictionary element contains a page title and the html file name. ''' links = [] for page in self.pages: if self.full_toc: toc = page.toc else: toc = [page.toc[0]] for tocitem in toc: link = '%s%s' % (page.htmlfile, tocitem.header_id) links.append(dict(link=unicode(link, "utf8"), text=unicode(tocitem.heading, "utf8"), level=tocitem.level, )) return links @property def toc_json(self): lastchild = top = toc = TOCItem(self.title, 1, 'title_block.html') curlevel = 0 for page in self.pages: heading = '' for line in page.markdown.split('\n'): line = line.strip() if heading and re.match('[=]{2}', line): level = 1 elif heading and re.match('[-]{2}', line): level = 2 elif re.match('#+.+[A-z|0-9]', line): level = len(re.match('#+', line).group()) heading = line.split(' ', 1)[1].strip() else: heading = line.strip() continue if curlevel < level: toc = lastchild curlevel = toc.level while toc.parent and curlevel >= level: toc = toc.parent curlevel = toc.level lastchild = toc.add_child(heading, level, page.htmlfile) return top.json @property def filename_base(self): filename = self.title filename = re.sub(r'\s+', ' ', filename) filename = re.sub(r'[\n ]', '_', filename) filename = re.sub(r'[:,]', '-', filename) filename = re.sub(r'_+', '_', filename) filename = re.sub(r'-_', '-', filename) filename = re.sub(r'-+', '-', filename) return filename @property def pdf_filename(self): return self.filename_base + '.pdf' @property def epub_filename(self): return self.filename_base + '.epub' def publish_pdf(self, pub_path): pdf_path = os.path.join(pub_path, self.pdf_filename) src = self.title_block.source for page in self.pages: src += '\n%s' % page.source doc = pandoc.Document() doc.markdown = src pandoc.set_cwd(os.path.abspath(self.src_path)) doc.add_argument('latex-engine=xelatex') for option in self.pandoc_options: doc.add_argument(option) doc.to_file(pdf_path) pandoc.set_cwd(None) return self.pdf_filename def publish_epub(self, pub_path): epub_path = os.path.join(pub_path, self.epub_filename) src = self.title_block.source for page in self.pages: src += '\n%s' % page.source doc = pandoc.Document() doc.markdown = src print("epub_path: %s" % epub_path) pandoc.set_cwd(os.path.abspath(self.src_path)) for option in self.pandoc_options: doc.add_argument(option) doc.to_file(epub_path) pandoc.set_cwd(None) return self.epub_filename def publish_assets(self, pub_path): if not os.path.isdir(self.assets_dir): return for asset in os.listdir(self.assets_dir): src = os.path.join(self.assets_dir, asset) dst = os.path.join(pub_path, asset) if os.path.isdir(src): copy(src, dst) def publish_html(self, pub_path): pandoc.set_cwd(None) for page in self.pages: print("generating %s..." % page.htmlfile) template_file = NamedTemporaryFile(mode='w', suffix='pf.template', delete=False) template = self.templater.page_template(self, page) template_file.write(template) template_file.close() doc = pandoc.Document() doc.add_argument('toc') doc.add_argument('template=%s' % template_file.name) doc.add_argument('css=%s' % self.css_file) for option in self.pandoc_options: if option == 'number-sections': continue doc.add_argument(option) if self.standalone: markdown = '%s\n' % self.title_block.source else: markdown = '' markdown += self.expand_int_links(page.markdown) doc.markdown = markdown content = doc.html5 write_file(os.path.join(pub_path, page.htmlfile), unicode(content, 'utf-8')) # If there is not explicit index.html, then link 'index.html' # to the toplevel page. htmlfiles = [page.htmlfile for page in self.pages] if self.standalone and not 'index.html' in htmlfiles: src = os.path.join(self.pages[0].htmlfile) ref = os.path.join(pub_path, 'index.html') print("Linking %s to %s" % (ref, src)) if os.path.exists(ref): os.remove(ref) os.symlink(src, ref) if self.json_toc: write_file(os.path.join(pub_path, 'toc.json'), unicode(self.toc_json, 'utf-8')) ###### Copy any assets to publish directory self.publish_assets(pub_path)