Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added basic tag/category case sanitizing. Offers only two choices (fi…
…rst or lower).
  • Loading branch information
felixfontein committed Sep 7, 2015
1 parent 621f2ee commit 6361ec0
Showing 1 changed file with 36 additions and 1 deletion.
37 changes: 36 additions & 1 deletion nikola/plugins/command/import_wordpress.py
Expand Up @@ -191,6 +191,12 @@ class CommandImportWordpress(Command, ImportMixin):
'type': bool,
'help': "Automatically installs the WordPress page compiler (either locally or in the new site) if required by other options.\nWarning: the compiler is GPL software!",
},
{
'name': 'tag_saniziting_strategy',
'long': 'tag-saniziting-strategy',
'default': 'first',
'help': 'lower: Convert all tag and category names to lower case\nfirst: Keep first spelling of tag or category name',
},
]
all_tags = set([])

Expand Down Expand Up @@ -239,6 +245,8 @@ def _read_options(self, options, args):
self.install_wordpress_compiler = options.get('install_wordpress_compiler', False)
self.wordpress_page_compiler = None

self.tag_saniziting_strategy = options.get('tag_saniziting_strategy', 'first')

self.auth = None
if options.get('download_auth') is not None:
username_password = options.get('download_auth')
Expand Down Expand Up @@ -750,6 +758,24 @@ def _create_metadata(self, status, excerpt, tags, categories, post_name=None):
tags_cats = tags + categories
return tags_cats, other_meta

_tag_sanitize_map = {True: {}, False: {}}

def _sanitize(self, tag, is_category):
if self.tag_saniziting_strategy == 'lower':
return tag.lower()
if tag.lower() not in _tag_sanitize_map[is_category]:
_tag_sanitize_map[is_category][tag.lower()] = [tag]
return tag
previous = _tag_sanitize_map[is_category][tag.lower()]
if self.tag_saniziting_strategy == 'first':
if tag != previous[0]:
LOGGER.warn("Changing spelling of {0} name '{1}' to {2}.".format('category' if is_category else 'tag', tag, previous[0]))
return previous[0]
else:
LOGGER.error("Unknown tag sanitizing strategy '{0}'!".format(self.tag_saniziting_strategy))
sys.exit(1)
return tag

def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attachments=None):
"""Take an item from the feed and creates a post file."""
if out_folder is None:
Expand Down Expand Up @@ -837,7 +863,6 @@ def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attac
type = tag.attrib['domain']
if text == 'Uncategorized' and type == 'category':
continue
self.all_tags.add(text)
if type == 'category':
categories.append(text)
else:
Expand All @@ -846,6 +871,16 @@ def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attac
if '$latex' in content:
tags.append('mathjax')

for i, cat in enumerate(categories[:]):
cat = self._sanitize(cat, True)
categories[i] = cat
self.all_tags.add(cat)

for i, tag in enumerate(tags[:]):
tag = self._sanitize(tag, False)
tags[i] = tag
self.all_tags.add(tag)

# Find post format if it's there
post_format = 'wp'
format_tag = [x for x in item.findall('*//{%s}meta_key' % wordpress_namespace) if x.text == '_tc_post_format']
Expand Down

0 comments on commit 6361ec0

Please sign in to comment.