1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import pandoc
from panfry.toc import TOCItem
class TextFile:
'''
Basic text file class
'''
def __init__(self, filename, source):
self.filename = filename
self.source = source
@property
def lines(self):
'''
Returns list of lines from source.
Leading and trailing blank lines are removed.
'''
return self.source.strip().split('\n')
@property
def firstline(self):
return self.lines[0]
class Page(TextFile):
'''
Represents single page source file.
'''
def __init__(self, filename, source, format='markdown'):
TextFile.__init__(self, filename, source)
self.doc = pandoc.Document()
self.doc._input(self.source, format=format)
self.format = format
self._markdown = ''
def __repr__(self):
return self.markdown
@property
def markdown(self):
if self.format == 'markdown':
return self.source
if not self._markdown:
self._markdown = self.doc.markdown
return self._markdown
@property
def title(self):
'''
Returns a tuple containing a string representing the page title
and an integer representing the level in the document outline
hierarchy.
The level is preserved so that page titles can be properly
positioned (indented) in the table of contents.
If the page has a pandoc title block, the title is retreived
from there and returned with a level of 1
If a title block is not found, the first heading is returned
with the corrisponding heading level.
Finally, if no title block or headings are found in the page,
the filename is returned as the title with underscores changed
to spaces.
'''
if self.lines[0].startswith('%'):
title = (self.lines[0].split(' ', 1)[1].strip(), 1)
elif self.toc:
title = self.toc[0]
else:
title = (self.filename.replace('_', ' '), 1)
return title
@property
def toc(self):
'''
Returns a list of TOCItems of the page.
'''
toc = []
heading = ''
code = 0
for line in self.markdown.split('\n'):
# Ignore if we are inside fenced code block
if line.startswith('~~~') or line.startswith('```'):
if not code:
### FIXME. Need better way to get length of ~~
### or `` characters.
code = len(line.strip())
elif len(line.strip()) >= code:
code = 0
if code:
continue
line = line.strip()
if heading and re.match('[=]{2}', line):
toc.append(TOCItem(heading, 1, self.htmlfile))
continue
if heading and re.match('[-]{2}', line):
toc.append(TOCItem(heading, 2, self.htmlfile))
continue
if re.match('#+.+[A-z|0-9]', line):
level = len(re.match('#+', line).group())
toc.append(TOCItem(line.split(' ', 1)[1].strip(),
level,
self.htmlfile,
)
)
continue
heading = line.strip()
return toc
@property
def htmlfile(self):
return '.'.join(self.filename.split('.')[:-1]) + '.html'
|