Merge branch 'master' of mudgum.net:panfry

author: Scott Bahling <sbahling@mudgum.net> 2013-07-28 12:54:44 +0200
committer: Scott Bahling <sbahling@mudgum.net> 2013-07-28 12:54:44 +0200
commit: 8ba0a5fe5708569b4cc948dc3460d4eb34755b23 (patch)
tree: bd7674d34138a53dfcc14ef9cce004d0919cd0bb /panfry
parent: 921dd24342f9b940751ddf3f5ca73132131a4062 (diff)
parent: ab77b7fcc1416c1acb8e77572db7b30c72cc5292 (diff)
download: panfry-8ba0a5fe5708569b4cc948dc3460d4eb34755b23.tar.gz
panfry-8ba0a5fe5708569b4cc948dc3460d4eb34755b23.tar.xz
panfry-8ba0a5fe5708569b4cc948dc3460d4eb34755b23.zip
4 files changed, 134 insertions, 75 deletions
diff --git a/panfry/cli.py b/panfry/cli.py
index 6949474..650b724 100755
--- a/panfry/cli.py
+++ b/panfry/cli.py
@@ -107,6 +107,14 @@ Args:
         
         Example --pandoc-options="smart base-header-level=2"
 
+  json_toc:
+    flags: [--json-toc]
+    action: store_true
+    default: False
+    help: |
+        Generate toc.json file instead of toc integrated in
+        the html
+
   port:
     flags: [-P, --port]
     default: 8080
@@ -129,6 +137,7 @@ Subparsers:
       - pub_path
       - templates_path
       - simple_toc
+      - json_toc
       - assets
       - clean
       - css
diff --git a/panfry/document.py b/panfry/document.py
index 2b1cf01..e03aca1 100755
--- a/panfry/document.py
+++ b/panfry/document.py
@@ -1,20 +1,24 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+from __future__ import print_function
 import os
 import re
 import pandoc
 from tempfile import NamedTemporaryFile
 from panfry.util import *
 from panfry.page import TextFile, Page
+from panfry.toc import TOCItem
 
 
 class Document:
 
-    def __init__(self, path):
-        self.src_path = os.path.join(path, 'src')
+    def __init__(self, path, subdoc=''):
+        self.src_path = os.path.join(path, 'src', subdoc)
         self.meta = self.get_meta(self.src_path)
         self.pages = self.get_pages(self.src_path)
         self.full_toc = True
+        self.standalone = True
+        self.json_toc = False
 
         self.css_file = 'css/style.css'
         self.assets_dir = 'assets'
@@ -24,6 +28,11 @@ class Document:
     def set_simple_toc(self):
             self.full_toc = False
 
+    @property
+    def set_json_toc(self):
+            self.json_toc = True
+            self.standalone = False
+
     def set_pandoc_options(self, options):
             self.pandoc_options = options
 
@@ -62,18 +71,18 @@ class Document:
         for page in self.pages:
             for tocitem in page.toc:
                 if tocitem.heading == heading.replace('\n', ' '):
-                    return '[%s](%s#%s)' % (heading,
-                                            page.htmlfile,
-                                            tocitem.header_id,
-                                            )
+                    return '[%s](%s%s)' % (heading,
+                                           page.htmlfile,
+                                           tocitem.header_id,
+                                           )
 
         return '[%s]' % ref_txt
 
     @property
     def title(self):
-        if self.header.firstline.startswith('%'):
-            title = self.header.firstline.split('%')[1].strip()
-            for line in self.header.lines[1:]:
+        if self.title_block.firstline.startswith('%'):
+            title = self.title_block.firstline.split('%')[1].strip()
+            for line in self.title_block.lines[1:]:
                 if line.startswith('%'):
                     break
                 title = '%s %s' % (title, line.strip())
@@ -98,7 +107,7 @@ class Document:
         return pages
 
     def get_meta(self, path):
-        metafiles = ['TOC', 'HEADER']
+        metafiles = ['TOC', 'TITLE_BLOCK']
         meta = {}
         for filename in metafiles:
             source = read_file(os.path.join(path, filename))
@@ -143,7 +152,7 @@ class Document:
                 toc = [page.toc[0]]
 
             for tocitem in toc:
-                link = '%s#%s' % (page.htmlfile, tocitem.header_id)
+                link = '%s%s' % (page.htmlfile, tocitem.header_id)
                 links.append(dict(link=unicode(link, "utf8"),
                                   text=unicode(tocitem.heading, "utf8"),
                                   level=tocitem.level,
@@ -152,6 +161,37 @@ class Document:
         return links
 
     @property
+    def toc_json(self):
+        lastchild = top = toc = TOCItem(self.title, 1, 'title_block.html')
+
+        curlevel = 0
+        for page in self.pages:
+            heading = ''
+            for line in page.markdown.split('\n'):
+                line = line.strip()
+                if heading and re.match('[=]{2}', line):
+                    level = 1
+                elif heading and re.match('[-]{2}', line):
+                    level = 2
+                elif re.match('#+.+[A-z|0-9]', line):
+                    level = len(re.match('#+', line).group())
+                    heading = line.split(' ', 1)[1].strip()
+                else:
+                    heading = line.strip()
+                    continue
+
+                if curlevel < level:
+                    toc = lastchild
+                    curlevel = toc.level
+                while toc.parent and curlevel >= level:
+                    toc = toc.parent
+                    curlevel = toc.level
+
+                lastchild = toc.add_child(heading, level, page.htmlfile)
+
+        return top.json
+
+    @property
     def filename_base(self):
         filename = self.title
         filename = re.sub(r'\s+', ' ', filename)
@@ -173,7 +213,7 @@ class Document:
 
     def publish_pdf(self, pub_path):
         pdf_path = os.path.join(pub_path, self.pdf_filename)
-        src = self.header.source
+        src = self.title_block.source
         for page in self.pages:
             src += '\n%s' % page.source
 
@@ -191,7 +231,7 @@ class Document:
 
     def publish_epub(self, pub_path):
         epub_path = os.path.join(pub_path, self.epub_filename)
-        src = self.header.source
+        src = self.title_block.source
         for page in self.pages:
             src += '\n%s' % page.source
 
@@ -231,24 +271,37 @@ class Document:
             doc.add_argument('toc')
             doc.add_argument('template=%s' % template_file.name)
             doc.add_argument('css=%s' % self.css_file)
+
             for option in self.pandoc_options:
                 if option == 'number-sections':
                     continue
                 doc.add_argument(option)
-            doc.markdown = '%s\n%s' % (self.header.source,
-                                       self.expand_int_links(page.markdown))
+
+            if self.standalone:
+                markdown = '%s\n' % self.title_block.source
+            else:
+                markdown = ''
+
+            markdown += self.expand_int_links(page.markdown)
+            doc.markdown = markdown
             content = doc.html5
             write_file(os.path.join(pub_path, page.htmlfile),
                        unicode(content, 'utf-8'))
 
         # If there is not explicit index.html, then link 'index.html'
         # to the toplevel page.
-        if not 'index.md' in self.pages:
+        htmlfiles = [page.htmlfile for page in self.pages]
+        if self.standalone and not 'index.html' in htmlfiles:
             src = os.path.join(self.pages[0].htmlfile)
             ref = os.path.join(pub_path, 'index.html')
+            print("Linking %s to %s" % (ref, src))
             if os.path.exists(ref):
                 os.remove(ref)
             os.symlink(src, ref)
 
+        if self.json_toc:
+            write_file(os.path.join(pub_path, 'toc.json'),
+                       unicode(self.toc_json, 'utf-8'))
+
         ###### Copy any assets to publish directory
         self.publish_assets(pub_path)
diff --git a/panfry/main.py b/panfry/main.py
index 40c2b15..1b90181 100755
--- a/panfry/main.py
+++ b/panfry/main.py
@@ -36,8 +36,10 @@ class StoppableHttpServer(BaseHTTPServer.HTTPServer):
 def get_env():
     env = panfry.cli.init_argparser().parse_args()
     env.pub_path = os.path.abspath(env.pub_path)
+    env.index = ['']
 
     if env.cmd in ['gen']:
+        INDEX = os.path.join(env.doc_path, 'src', 'INDEX')
         if not os.path.isdir(env.doc_path):
             print("%s directory not found. Aborting..." % env.doc_path)
             exit(1)
@@ -52,44 +54,53 @@ def get_env():
         if env.pandoc_options:
             env.pandoc_options = list(shlex.split(env.pandoc_options))
 
+        if os.path.isfile(INDEX):
+            env.index += open(INDEX, 'r').readlines()
+
     return env
 
 
 def gen(env):
-    document = Document(env.doc_path)
-    document.set_templater(Templater(env.templates_path))
-    document.set_pandoc_options(env.pandoc_options)
-    if env.simple_toc:
-        document.set_simple_toc
-
-    if env.clean and os.path.exists(env.pub_path):
-        print("Cleanup, removing %s..." % env.pub_path)
-        shutil.rmtree(env.pub_path)
-    if not os.path.exists(env.pub_path):
-        os.mkdir(env.pub_path)
-
-    if env.assets:
-        print("Publishing assets only...", end=' ')
-        document.publish_assets(env.pub_path)
-        print("done.")
-        exit(0)
-
-    print("\nUsing pandoc options:")
-    for opt in env.pandoc_options:
-        print("  --%s" % opt)
-
-    print()
-
-    ###### Create PDF
-    pdffile = document.publish_pdf(env.pub_path)
-    print("Wrote PDF: %s" % pdffile)
-
-    ###### Create epub
-    epubfile = document.publish_epub(env.pub_path)
-    print("Wrote epub: %s" % epubfile)
-
-    ###### Create HTML
-    document.publish_html(env.pub_path)
+    for item in env.index:
+        item = item.strip()
+        docpubpath = os.path.join(env.pub_path, item)
+        document = Document(env.doc_path, item)
+        document.set_templater(Templater(env.templates_path))
+        document.set_pandoc_options(env.pandoc_options)
+        if env.simple_toc:
+            document.set_simple_toc
+
+        if env.json_toc:
+            document.set_json_toc
+
+        if env.clean and os.path.exists(env.pub_path):
+            print("Cleanup, removing %s..." % env.pub_path)
+            shutil.rmtree(env.pub_path)
+        if not os.path.exists(docpubpath):
+            os.makedirs(docpubpath)
+
+        if env.assets:
+            print("Publishing assets only...", end=' ')
+            document.publish_assets(env.pub_path)
+            print("done.")
+            exit(0)
+
+        print("\nUsing pandoc options:")
+        for opt in env.pandoc_options:
+            print("  --%s" % opt)
+
+        print()
+
+        ###### Create PDF
+        pdffile = document.publish_pdf(docpubpath)
+        print("Wrote PDF: %s" % pdffile)
+
+        ###### Create epub
+        epubfile = document.publish_epub(docpubpath)
+        print("Wrote epub: %s" % epubfile)
+
+        ###### Create HTML
+        document.publish_html(docpubpath)
 
     exit(0)
 
diff --git a/panfry/page.py b/panfry/page.py
index 722241c..efd3561 100755
--- a/panfry/page.py
+++ b/panfry/page.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import re
 import pandoc
+from panfry.toc import TOCItem
 
 
 class TextFile:
@@ -25,28 +26,6 @@ class TextFile:
         return self.lines[0]
 
 
-class TOCItem:
-    '''
-    Table of Contents Line Item
-    '''
-    def __init__(self, heading, level):
-        self.heading = heading
-        self.level = level
-
-    @property
-    def header_id(self):
-        # - Remove all formatting, links, etc.
-        # - Remove all punctuation, except underscores, hyphens, and periods.
-        # - Replace all spaces and newlines with hyphens.
-        # - Convert all alphabetic characters to lowercase.
-        # - Remove everything up to the first letter (identifiers may not begin
-        #   with a number or punctuation mark).
-
-        remove = re.compile('^[^a-zA-Z]|[+~!@#$%^&*\(\){}\[\];:"\',<>?/\`]')
-        header_id = self.heading.lower().replace('\n', ' ').replace(' ', '-')
-        return remove.sub('', header_id)
-
-
 class Page(TextFile):
     '''
     Represents single page source file.
@@ -59,6 +38,9 @@ class Page(TextFile):
         self.format = format
         self._markdown = ''
 
+    def __repr__(self):
+        return self.markdown
+
     @property
     def markdown(self):
         if self.format == 'markdown':
@@ -106,14 +88,18 @@ class Page(TextFile):
         for line in self.markdown.split('\n'):
             line = line.strip()
             if heading and re.match('[=]{2}', line):
-                toc.append(TOCItem(heading, 1))
+                toc.append(TOCItem(heading, 1, self.htmlfile))
                 continue
             if heading and re.match('[-]{2}', line):
-                toc.append(TOCItem(heading, 2))
+                toc.append(TOCItem(heading, 2, self.htmlfile))
                 continue
             if re.match('#+.+[A-z|0-9]', line):
                 level = len(re.match('#+', line).group())
-                toc.append(TOCItem(line.split(' ', 1)[1].strip(), level))
+                toc.append(TOCItem(line.split(' ', 1)[1].strip(),
+                                   level,
+                                   self.htmlfile,
+                                   )
+                          )
                 continue
             heading = line.strip()
author	Scott Bahling <sbahling@mudgum.net>	2013-07-28 12:54:44 +0200
committer	Scott Bahling <sbahling@mudgum.net>	2013-07-28 12:54:44 +0200
commit	8ba0a5fe5708569b4cc948dc3460d4eb34755b23 (patch)
tree	bd7674d34138a53dfcc14ef9cce004d0919cd0bb /panfry
parent	921dd24342f9b940751ddf3f5ca73132131a4062 (diff)
parent	ab77b7fcc1416c1acb8e77572db7b30c72cc5292 (diff)
download	panfry-8ba0a5fe5708569b4cc948dc3460d4eb34755b23.tar.gz panfry-8ba0a5fe5708569b4cc948dc3460d4eb34755b23.tar.xz panfry-8ba0a5fe5708569b4cc948dc3460d4eb34755b23.zip