panfry/page.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import pandoc
from panfry.toc import TOCItem


class TextFile:
    '''
    Basic text file class
    '''
    def __init__(self, filename, source):
        self.filename = filename
        self.source = source

    @property
    def lines(self):
        '''
        Returns list of lines from source.
        Leading and trailing blank lines are removed.
        '''
        return self.source.strip().split('\n')

    @property
    def firstline(self):
        return self.lines[0]


class Page(TextFile):
    '''
    Represents single page source file.
    '''

    def __init__(self, filename, source, format='markdown'):
        TextFile.__init__(self, filename, source)
        self.doc = pandoc.Document()
        self.doc._input(self.source, format=format)
        self.format = format
        self._markdown = ''

    @property
    def markdown(self):
        if self.format == 'markdown':
            return self.source
        if not self._markdown:
            self._markdown = self.doc.markdown
        return self._markdown

    @property
    def title(self):
        '''
        Returns a tuple containing a string representing the page title
        and an integer representing the level in the document outline
        hierarchy.

        The level is preserved so that page titles can be properly
        positioned (indented) in the table of contents.

        If the page has a pandoc title block, the title is retreived
        from there and returned with a level of 1

        If a title block is not found, the first heading is returned
        with the corrisponding heading level.

        Finally, if no title block or headings are found in the page,
        the filename is returned as the title with underscores changed
        to spaces.
        '''
        if self.lines[0].startswith('%'):
            title = (self.lines[0].split(' ', 1)[1].strip(), 1)
        elif self.toc:
            title = self.toc[0]
        else:
            title = (self.filename.replace('_', ' '), 1)

        return title

    @property
    def toc(self):
        '''
        Returns a list of TOCItems of the page.
        '''
        toc = []
        heading = ''
        for line in self.markdown.split('\n'):
            line = line.strip()
            if heading and re.match('[=]{2}', line):
                toc.append(TOCItem(heading, 1, self.htmlfile))
                continue
            if heading and re.match('[-]{2}', line):
                toc.append(TOCItem(heading, 2, self.htmlfile))
                continue
            if re.match('#+.+[A-z|0-9]', line):
                level = len(re.match('#+', line).group())
                toc.append(TOCItem(line.split(' ', 1)[1].strip(), level, self.htmlfile))
                continue
            heading = line.strip()

        return toc

    @property
    def htmlfile(self):
        return '.'.join(self.filename.split('.')[:-1]) + '.html'