Skip to content

Commit

Permalink
Merge pull request #2448 from getnikola/wordpress-markdown-conversion
Browse files Browse the repository at this point in the history
Wordpress MarkDown conversion
  • Loading branch information
ralsina committed Aug 18, 2016
2 parents 79fee56 + c3dd425 commit c9e8406
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Expand Up @@ -7,6 +7,8 @@ Features
* New NO_DOCUTILS_TITLE_TRANSFORM (Issue #2382)
* Update options of chart directive to Pygal 2.2.3
* Pass global context to template shortcodes (Issue #2424)
* Added new options --html2text and --transform-to-markdown
to WordPress importer (Issue #2261)

Bugfixes
--------
Expand Down
62 changes: 56 additions & 6 deletions nikola/plugins/command/import_wordpress.py
Expand Up @@ -37,6 +37,11 @@
from lxml import etree
from collections import defaultdict

try:
import html2text
except:
html2text = None

try:
from urlparse import urlparse
from urllib import unquote
Expand Down Expand Up @@ -169,6 +174,20 @@ class CommandImportWordpress(Command, ImportMixin):
'type': bool,
'help': "Export comments as .wpcomment files",
},
{
'name': 'html2text',
'long': 'html2text',
'default': False,
'type': bool,
'help': "Uses html2text (needs to be installed with pip) to transform WordPress posts to MarkDown during import",
},
{
'name': 'transform_to_markdown',
'long': 'transform-to-markdown',
'default': False,
'type': bool,
'help': "Uses WordPress page compiler to transform WordPress posts to HTML and then use html2text to transform them to MarkDown during import",
},
{
'name': 'transform_to_html',
'long': 'transform-to-html',
Expand Down Expand Up @@ -262,6 +281,9 @@ def _read_options(self, options, args):
self.export_categories_as_categories = options.get('export_categories_as_categories', False)
self.export_comments = options.get('export_comments', False)

self.html2text = options.get('html2text', False)
self.transform_to_markdown = options.get('transform_to_markdown', False)

self.transform_to_html = options.get('transform_to_html', False)
self.use_wordpress_compiler = options.get('use_wordpress_compiler', False)
self.install_wordpress_compiler = options.get('install_wordpress_compiler', False)
Expand All @@ -280,10 +302,18 @@ def _read_options(self, options, args):
self.separate_qtranslate_content = options.get('separate_qtranslate_content')
self.translations_pattern = options.get('translations_pattern')

if self.transform_to_html and self.use_wordpress_compiler:
LOGGER.warn("It does not make sense to combine --transform-to-html with --use-wordpress-compiler, as the first converts all posts to HTML and the latter option affects zero posts.")
count = (1 if self.html2text else 0) + (1 if self.transform_to_html else 0) + (1 if self.transform_to_markdown else 0)
if count > 1:
LOGGER.error("You can use at most one of the options --html2text, --transform-to-html and --transform-to-markdown.")
return False
if (self.html2text or self.transform_to_html or self.transform_to_markdown) and self.use_wordpress_compiler:
LOGGER.warn("It does not make sense to combine --use-wordpress-compiler with any of --html2text, --transform-to-html and --transform-to-markdown, as the latter convert all posts to HTML and the first option then affects zero posts.")

if (self.html2text or self.transform_to_markdown) and not html2text:
LOGGER.error("You need to install html2text via 'pip install html2text' before you can use the --html2text and --transform-to-markdown options.")
return False

if self.transform_to_html:
if self.transform_to_html or self.transform_to_markdown:
self._find_wordpress_compiler()
if not self.wordpress_page_compiler and self.install_wordpress_compiler:
if not install_plugin(self.site, 'wordpress_compiler', output_dir='plugins'): # local install
Expand Down Expand Up @@ -667,10 +697,10 @@ def replacement(m, c=content):
return content

@staticmethod
def transform_caption(content):
def transform_caption(content, use_html=False):
"""Transform captions."""
new_caption = re.sub(r'\[/caption\]', '', content)
new_caption = re.sub(r'\[caption.*\]', '', new_caption)
new_caption = re.sub(r'\[/caption\]', '</h1>' if use_html else '', content)
new_caption = re.sub(r'\[caption.*\]', '<h1>' if use_html else '', new_caption)

return new_caption

Expand All @@ -693,6 +723,26 @@ def transform_content(self, content, post_format, attachments):
except TypeError: # old versions of the plugin don't support the additional argument
content = self.wordpress_page_compiler.compile_to_string(content)
return content, 'html', True
elif self.transform_to_markdown:
# First convert to HTML with WordPress plugin
additional_data = {}
if attachments is not None:
additional_data['attachments'] = attachments
try:
content = self.wordpress_page_compiler.compile_to_string(content, additional_data=additional_data)
except TypeError: # old versions of the plugin don't support the additional argument
content = self.wordpress_page_compiler.compile_to_string(content)
# Now convert to MarkDown with html2text
h = html2text.HTML2Text()
content = h.handle(content)
return content, 'md', False
elif self.html2text:
# TODO: what to do with [code] blocks?
# content = self.transform_code(content)
content = self.transform_caption(content, use_html=True)
h = html2text.HTML2Text()
content = h.handle(content)
return content, 'md', False
elif self.use_wordpress_compiler:
return content, 'wp', False
else:
Expand Down
6 changes: 6 additions & 0 deletions tests/test_command_import_wordpress.py
Expand Up @@ -171,6 +171,8 @@ def test_create_import_work_without_argument(self):
def test_populate_context(self):
channel = self.import_command.get_channel_from_file(
self.import_filename)
self.import_command.html2text = False
self.import_command.transform_to_markdown = False
self.import_command.transform_to_html = False
self.import_command.use_wordpress_compiler = False
context = self.import_command.populate_context(channel)
Expand All @@ -195,6 +197,8 @@ def test_importing_posts_and_attachments(self):
self.import_command.no_downloads = False
self.import_command.export_categories_as_categories = False
self.import_command.export_comments = False
self.import_command.html2text = False
self.import_command.transform_to_markdown = False
self.import_command.transform_to_html = False
self.import_command.use_wordpress_compiler = False
self.import_command.tag_saniziting_strategy = 'first'
Expand Down Expand Up @@ -315,6 +319,8 @@ def test_transforming_content(self):
transform_caption = mock.MagicMock()
transform_newlines = mock.MagicMock()

self.import_command.html2text = False
self.import_command.transform_to_markdown = False
self.import_command.transform_to_html = False
self.import_command.use_wordpress_compiler = False

Expand Down

0 comments on commit c9e8406

Please sign in to comment.