Skip to content

Commit 6361ec0

Browse files
committedSep 7, 2015
Added basic tag/category case sanitizing. Offers only two choices (first or lower).
1 parent 621f2ee commit 6361ec0

File tree

1 file changed

+36
-1
lines changed

1 file changed

+36
-1
lines changed
 

‎nikola/plugins/command/import_wordpress.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ class CommandImportWordpress(Command, ImportMixin):
191191
'type': bool,
192192
'help': "Automatically installs the WordPress page compiler (either locally or in the new site) if required by other options.\nWarning: the compiler is GPL software!",
193193
},
194+
{
195+
'name': 'tag_saniziting_strategy',
196+
'long': 'tag-saniziting-strategy',
197+
'default': 'first',
198+
'help': 'lower: Convert all tag and category names to lower case\nfirst: Keep first spelling of tag or category name',
199+
},
194200
]
195201
all_tags = set([])
196202

@@ -239,6 +245,8 @@ def _read_options(self, options, args):
239245
self.install_wordpress_compiler = options.get('install_wordpress_compiler', False)
240246
self.wordpress_page_compiler = None
241247

248+
self.tag_saniziting_strategy = options.get('tag_saniziting_strategy', 'first')
249+
242250
self.auth = None
243251
if options.get('download_auth') is not None:
244252
username_password = options.get('download_auth')
@@ -750,6 +758,24 @@ def _create_metadata(self, status, excerpt, tags, categories, post_name=None):
750758
tags_cats = tags + categories
751759
return tags_cats, other_meta
752760

761+
_tag_sanitize_map = {True: {}, False: {}}
762+
763+
def _sanitize(self, tag, is_category):
764+
if self.tag_saniziting_strategy == 'lower':
765+
return tag.lower()
766+
if tag.lower() not in _tag_sanitize_map[is_category]:
767+
_tag_sanitize_map[is_category][tag.lower()] = [tag]
768+
return tag
769+
previous = _tag_sanitize_map[is_category][tag.lower()]
770+
if self.tag_saniziting_strategy == 'first':
771+
if tag != previous[0]:
772+
LOGGER.warn("Changing spelling of {0} name '{1}' to {2}.".format('category' if is_category else 'tag', tag, previous[0]))
773+
return previous[0]
774+
else:
775+
LOGGER.error("Unknown tag sanitizing strategy '{0}'!".format(self.tag_saniziting_strategy))
776+
sys.exit(1)
777+
return tag
778+
753779
def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attachments=None):
754780
"""Take an item from the feed and creates a post file."""
755781
if out_folder is None:
@@ -837,7 +863,6 @@ def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attac
837863
type = tag.attrib['domain']
838864
if text == 'Uncategorized' and type == 'category':
839865
continue
840-
self.all_tags.add(text)
841866
if type == 'category':
842867
categories.append(text)
843868
else:
@@ -846,6 +871,16 @@ def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attac
846871
if '$latex' in content:
847872
tags.append('mathjax')
848873

874+
for i, cat in enumerate(categories[:]):
875+
cat = self._sanitize(cat, True)
876+
categories[i] = cat
877+
self.all_tags.add(cat)
878+
879+
for i, tag in enumerate(tags[:]):
880+
tag = self._sanitize(tag, False)
881+
tags[i] = tag
882+
self.all_tags.add(tag)
883+
849884
# Find post format if it's there
850885
post_format = 'wp'
851886
format_tag = [x for x in item.findall('*//{%s}meta_key' % wordpress_namespace) if x.text == '_tc_post_format']

0 commit comments

Comments
 (0)
Please sign in to comment.