#!/usr/bin/env python # -*- coding: utf-8 -*- import os import re import pandoc from tempfile import NamedTemporaryFile from panfry.util import * from panfry.page import TextFile, Page class Document: def __init__(self, path): self.src_path = path self.meta = self.get_meta(path) self.pages = self.get_pages(path) self.full_toc = True self.css_file = 'css/style.css' @property def set_simple_toc(self): self.full_toc = False @property def set_full_toc(self): self.full_toc = True def set_templater(self, templater): self.templater = templater def expand_int_links(self, source): ''' Converts all internal links to full url links so that links reference accross the split up html pages Arguments: source = original markdown source returns: markdown source with expanded links ''' re_link1 = re.compile(r'(\[(?P[^\]^]+)\])\((?P#.*)\)', re.S) re_link2 = re.compile(r'(\[(?P[^\]^]+)\][^(])', re.S) source = re_link1.sub(self._get_cross_link, source) source = re_link2.sub(self._get_cross_link, source) return source def _get_cross_link(self, m): ref_txt = m.group('s') for page in self.pages: for tocitem in page.toc: if tocitem.heading == ref_txt.replace('\n', ' '): return '[%s](%s#%s)' % (ref_txt, page.htmlfile, tocitem.header_id, ) return '[%s]' % ref_txt @property def title(self): if self.header.firstline.startswith('%'): title = self.header.firstline.split('%')[1].strip() for line in self.header.lines[1:]: if line.startswith('%'): break title = '%s %s' % (title, line.strip()) return title else: return pages[0].title def get_pages(self, path): pages = [] filelist = self.toc.lines print filelist for filename in filelist: print("Reading in %s..." % filename) if not filename: # blank line in toc print("WARNING: %s contains blank lines" % self.toc.filename) continue source = read_file(os.path.join(path, filename)) if source: pages.append(Page(filename, source)) else: print("WARNING: Page: %s not found or is empty." % filename) return pages def get_meta(self, path): metafiles = ['TOC', 'HEADER'] meta = {} for filename in metafiles: source = read_file(os.path.join(path, filename)) if source: metafile = TextFile(filename, source) setattr(self, filename.lower(), metafile) else: print("!E: %s not found or is empty. Aborting..." % filename) exit(1) return meta def next_page(self, page): try: idx = self.pages.index(page) except: return '' if idx >= len(self.pages) - 1: return '' return self.pages[idx+1] def prev_page(self, page): try: idx = self.pages.index(page) except: return '' if idx == 0: return '' return self.pages[idx-1] @property def toc_links(self): ''' Returns a list of dictionaries. Each dictionary element contains a page title and the html file name. ''' links = [] for page in self.pages: if self.full_toc: toc = page.toc else: toc = [page.toc[0]] for tocitem in toc: link = '%s#%s' % (page.htmlfile, tocitem.header_id) links.append(dict(link=unicode(link, "utf8"), text=unicode(tocitem.heading, "utf8"), level=tocitem.level, )) return links @property def filename_base(self): filename = self.title filename = re.sub(r'\s+', ' ', filename) filename = re.sub(r'[\n ]', '_', filename) filename = re.sub(r'[:,]', '-', filename) filename = re.sub(r'_+', '_', filename) filename = re.sub(r'-_', '-', filename) filename = re.sub(r'-+', '-', filename) return filename @property def pdf_filename(self): return self.filename_base + '.pdf' @property def epub_filename(self): return self.filename_base + '.epub' def publish_pdf(self, pub_path): pdf_path = os.path.join(pub_path, self.pdf_filename) src = self.header.source for page in self.pages: src += '\n%s' % page.source doc = pandoc.Document() doc.markdown = src pandoc.set_cwd(os.path.abspath(self.src_path)) doc.add_argument('latex-engine=xelatex') doc.to_file(pdf_path) pandoc.set_cwd(None) return self.pdf_filename def publish_epub(self, pub_path): epub_path = os.path.join(pub_path, self.epub_filename) src = self.header.source for page in self.pages: src += '\n%s' % page.source doc = pandoc.Document() doc.markdown = src pandoc.set_cwd(os.path.abspath(self.src_path)) doc.to_file(epub_path) pandoc.set_cwd(None) return self.epub_filename def publish_css(self, pub_path): src = os.path.join(self.src_path, 'css') dst = os.path.join(pub_path, 'css') if os.path.isdir(src): copy(src, dst) def publish_images(self, pub_path): src_path = self.src_path if os.path.exists(os.path.join(src_path, 'images/html')): src = os.path.join(src_path, 'images/html') else: src = os.path.join(src_path, 'images') dst = os.path.join(pub_path, 'images') if os.path.exists(src): copy(src, dst) def publish_html(self, pub_path): pandoc.set_cwd(None) for page in self.pages: print("generating %s..." % page.htmlfile) template_file = NamedTemporaryFile(mode='w', suffix='pf.template', delete=False) template = self.templater.page_template(self, page) template_file.write(template) template_file.close() doc = pandoc.Document() doc.add_argument('toc') doc.add_argument('template=%s' % template_file.name) doc.add_argument('css=%s' % self.css_file) doc.markdown = '%s\n%s' % (self.header.source, self.expand_int_links(page.markdown)) content = doc.html write_file(os.path.join(pub_path, page.htmlfile), unicode(content, 'utf-8')) # If there is not explicit index.html, then link 'index.html' # to the toplevel page. if not 'index.md' in self.pages: src = os.path.join(self.pages[0].htmlfile) ref = os.path.join(pub_path, 'index.html') os.symlink(src, ref) ###### Copy any images to publish directory self.publish_css(pub_path) self.publish_images(pub_path)