From 0426fce3b39ebedf6b92bfe6b18aa08987b11ea2 Mon Sep 17 00:00:00 2001 From: Scott Bahling Date: Sat, 23 Mar 2013 17:28:08 +0100 Subject: Improve header id generation --- panfry/document.py | 11 ++++++++--- panfry/page.py | 9 ++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'panfry') diff --git a/panfry/document.py b/panfry/document.py index 7bc65c2..edf1cf8 100755 --- a/panfry/document.py +++ b/panfry/document.py @@ -42,18 +42,23 @@ class Document: ''' re_link1 = re.compile(r'(\[(?P[^\]^]+)\])\((?P#.*)\)', re.S) re_link2 = re.compile(r'(\[(?P[^\]^]+)\][^(])', re.S) + re_link3 = re.compile(r'(\[(?P[^\]^]+)\])\[(?P[^\]].*)\]', re.S) source = re_link1.sub(self._get_cross_link, source) source = re_link2.sub(self._get_cross_link, source) + source = re_link3.sub(self._get_cross_link, source) return source def _get_cross_link(self, m): - ref_txt = m.group('s') + if 'h' in m.groupdict(): + heading = m.group('h') + else: + heading = m.group('s') for page in self.pages: for tocitem in page.toc: - if tocitem.heading == ref_txt.replace('\n', ' '): - return '[%s](%s#%s)' % (ref_txt, + if tocitem.heading == heading.replace('\n', ' '): + return '[%s](%s#%s)' % (heading, page.htmlfile, tocitem.header_id, ) diff --git a/panfry/page.py b/panfry/page.py index 01d9fcb..42eda1e 100755 --- a/panfry/page.py +++ b/panfry/page.py @@ -35,7 +35,14 @@ class TOCItem: @property def header_id(self): - remove = re.compile('[+~!@#$%^&*\(\){}\[\];:"\',.<>?/\`]') + # - Remove all formatting, links, etc. + # - Remove all punctuation, except underscores, hyphens, and periods. + # - Replace all spaces and newlines with hyphens. + # - Convert all alphabetic characters to lowercase. + # - Remove everything up to the first letter (identifiers may not begin + # with a number or punctuation mark). + + remove = re.compile('^[^a-zA-Z]|[+~!@#$%^&*\(\){}\[\];:"\',<>?/\`]') header_id = self.heading.lower().replace('\n', ' ').replace(' ', '-') return remove.sub('', header_id) -- cgit v1.2.3