summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Bahling <sbahling@mudgum.net>2013-03-23 17:28:08 +0100
committerScott Bahling <sbahling@mudgum.net>2013-03-23 17:28:08 +0100
commit0426fce3b39ebedf6b92bfe6b18aa08987b11ea2 (patch)
tree5fb1e4f1a8a117d9e7acbbbf289f9e46336cba64
parente80e4d436248d7129e3c9d58dfa61c28565a3e92 (diff)
downloadpanfry-0426fce3b39ebedf6b92bfe6b18aa08987b11ea2.tar.gz
panfry-0426fce3b39ebedf6b92bfe6b18aa08987b11ea2.tar.xz
panfry-0426fce3b39ebedf6b92bfe6b18aa08987b11ea2.zip
Improve header id generation
-rwxr-xr-xpanfry/document.py11
-rwxr-xr-xpanfry/page.py9
2 files changed, 16 insertions, 4 deletions
diff --git a/panfry/document.py b/panfry/document.py
index 7bc65c2..edf1cf8 100755
--- a/panfry/document.py
+++ b/panfry/document.py
@@ -42,18 +42,23 @@ class Document:
'''
re_link1 = re.compile(r'(\[(?P<s>[^\]^]+)\])\((?P<l>#.*)\)', re.S)
re_link2 = re.compile(r'(\[(?P<s>[^\]^]+)\][^(])', re.S)
+ re_link3 = re.compile(r'(\[(?P<s>[^\]^]+)\])\[(?P<h>[^\]].*)\]', re.S)
source = re_link1.sub(self._get_cross_link, source)
source = re_link2.sub(self._get_cross_link, source)
+ source = re_link3.sub(self._get_cross_link, source)
return source
def _get_cross_link(self, m):
- ref_txt = m.group('s')
+ if 'h' in m.groupdict():
+ heading = m.group('h')
+ else:
+ heading = m.group('s')
for page in self.pages:
for tocitem in page.toc:
- if tocitem.heading == ref_txt.replace('\n', ' '):
- return '[%s](%s#%s)' % (ref_txt,
+ if tocitem.heading == heading.replace('\n', ' '):
+ return '[%s](%s#%s)' % (heading,
page.htmlfile,
tocitem.header_id,
)
diff --git a/panfry/page.py b/panfry/page.py
index 01d9fcb..42eda1e 100755
--- a/panfry/page.py
+++ b/panfry/page.py
@@ -35,7 +35,14 @@ class TOCItem:
@property
def header_id(self):
- remove = re.compile('[+~!@#$%^&*\(\){}\[\];:"\',.<>?/\`]')
+ # - Remove all formatting, links, etc.
+ # - Remove all punctuation, except underscores, hyphens, and periods.
+ # - Replace all spaces and newlines with hyphens.
+ # - Convert all alphabetic characters to lowercase.
+ # - Remove everything up to the first letter (identifiers may not begin
+ # with a number or punctuation mark).
+
+ remove = re.compile('^[^a-zA-Z]|[+~!@#$%^&*\(\){}\[\];:"\',<>?/\`]')
header_id = self.heading.lower().replace('\n', ' ').replace(' ', '-')
return remove.sub('', header_id)