panfry/document.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import pandoc
from tempfile import NamedTemporaryFile
from panfry.util import *


class Document:

    def __init__(self, path):
        self.src_path = path
        self.meta = self.get_meta(path)
        self.pages = self.get_pages(path)

    css_file = 'css/style.css'
    workdir = 'stdocs-work'

    @property
    def title(self):
        header = self.meta['HEADER'].strip()
        if header.startswith('%'):
            return header.split('%')[1].strip()
        else:
            return pages[0].title

    def get_pages(self, path):
        pages = []
        filelist = get_lines(self.meta['TOC'])
        for filename in filelist:
            source = read_file(os.path.join(path, filename))
            if source:
                pages.append(Page(filename, source))
            else:
                print("WARNING: Page: %s not found or is empty." % filename)

        return pages

    def get_meta(self, path):
        metafiles = ['TOC', 'HEADER']
        meta = {}
        for filename in metafiles:
            source = read_file(os.path.join(path, filename))
            if source:
                meta[filename] = source
            else:
                print("!E: %s not found or is empty. Aborting..." % filename)
                exit(1)

        return meta

    def set_templater(self, templater):
        self.templater = templater

    def next_page(self, page):
        try:
            idx = self.pages.index(page)
        except:
            return ''
        if idx >= len(self.pages) - 1:
            return ''
        return self.pages[idx+1]

    def prev_page(self, page):
        try:
            idx = self.pages.index(page)
        except:
            return ''
        if idx == 0:
            return ''
        return self.pages[idx-1]

    @property
    def toc_links(self):
        '''
        Returns a list of dictionaries. Each dictionary element contains
        a page title and the html file name.
        '''
        links = []
        for page in self.pages:
            title, level = page.title
            links.append(dict(link=unicode(page.htmlfile, "utf8"),
                              text=unicode(title, "utf8"),
                              level=level,
                              ))

        return links

    @property
    def pdf_filename(self):
        filename = self.title
        filename = self.meta['HEADER'].split('%')[1].strip()
        filename = re.sub(r'\s+', ' ', filename)
        filename = re.sub(r'[\n ]', '_', filename)
        filename = re.sub(r'[:,]', '-', filename)
        filename = re.sub(r'_+', '_', filename)
        filename = re.sub(r'-_', '-', filename)
        filename = re.sub(r'-+', '-', filename)

        return filename + '.pdf'

    def publish_pdf(self, pub_path):
        pdf_path = os.path.join(pub_path, self.pdf_filename)
        src = self.meta['HEADER']
        for page in self.pages:
            src += '\n%s' % page.source

        doc = pandoc.Document()
        doc.markdown = src

        pandoc.set_cwd(os.path.abspath(self.src_path))
        doc.to_file(pdf_path)
        pandoc.set_cwd(None)

        return self.pdf_filename

    def publish_css(self, pub_path):
        src = os.path.join(self.src_path, 'css')
        dst = os.path.join(pub_path, 'css')
        if os.path.isdir(src):
            copy(src, dst)

    def publish_images(self, pub_path):
        src_path = self.src_path
        if os.path.exists(os.path.join(src_path, 'images/html')):
            src = os.path.join(src_path, 'images/html')
        else:
            src = os.path.join(src_path, 'images')

        dst = os.path.join(pub_path, 'images')
        if os.path.exists(src):
            copy(src, dst)

    def publish_html(self, pub_path):
        pandoc.set_cwd(None)
        for page in self.pages:
            print("generating %s..." % page.htmlfile)
            template_file = NamedTemporaryFile(mode='w',
                                               suffix='pf.template',
                                               delete=False)
            template = self.templater.page_template(self, page)
            template_file.write(template)
            template_file.close()
            doc = pandoc.Document()
            doc.add_argument('toc')
            doc.add_argument('template=%s' % template_file.name)
            doc.add_argument('css=%s' % self.css_file)
            doc.markdown = '%s\n%s' % (self.meta['HEADER'], page.source)
            content = doc.html
            write_file(os.path.join(pub_path, page.htmlfile),
                       unicode(content, 'utf-8'))

        # If there is not explicit index.html, then link 'index.html'
        # to the toplevel page.
        if not 'index.md' in self.pages:
            src = os.path.join(self.pages[0].htmlfile)
            ref = os.path.join(pub_path, 'index.html')
            os.symlink(src, ref)

        ###### Copy any images to publish directory
        self.publish_css(pub_path)
        self.publish_images(pub_path)