Initial (broken) implementation of pelican post reading

Kwpolska · Kwpolska · commit 332c8c79f69b · 2015-08-23T18:45:41.000+02:00
Signed-off-by: Chris Warrick &lt;kwpolska@gmail.com&gt;
diff --git a/nikola/plugin_categories.py b/nikola/plugin_categories.py
@@ -242,6 +242,7 @@ class PageCompiler(BasePlugin):
     friendly_name = ''
     demote_headers = False
     supports_onefile = True
+    metadata_can_be_overridden = False
     default_metadata = {
         'title': '',
         'slug': '',
@@ -299,7 +300,7 @@ def get_compiler_extensions(self):
         return plugins
 
 
-class CompilerExtension(BasePlugin):
+class CompilerExtension(BasePgin):
 
     """An extension for a Nikola compiler.
 
diff --git a/nikola/plugins/compile/rest/__init__.py b/nikola/plugins/compile/rest/__init__.py
@@ -41,13 +41,20 @@
 from nikola.utils import unicode_str, get_logger, makedirs, write_metadata, STDERR_HANDLER
 
 
+DOCINFO_PELICAN_NIKOLA_MAPPING = {
+    'modified': 'updated',
+    'authors': 'author',
+    'summary': 'description'
+}
+
 class CompileRest(PageCompiler):
 
     """Compile reStructuredText into HTML."""
 
     name = "rest"
     friendly_name = "reStructuredText"
     demote_headers = True
+    metadata_can_be_overridden = True
     logger = None
 
     def _read_extra_deps(self, post):
@@ -63,7 +70,7 @@ def register_extra_dependencies(self, post):
         """Add dependency to post object to check .dep file."""
         post.add_dependency(lambda: self._read_extra_deps(post), 'fragment')
 
-    def compile_html_string(self, data, source_path=None, is_two_file=True):
+    def compile_html_string(self, data, source_path=None, is_two_file=True, return_publisher=False):
         """Compile reST into HTML strings."""
         # If errors occur, this will be added to the line number reported by
         # docutils so the line number matches the actual line number (off by
@@ -74,7 +81,10 @@ def compile_html_string(self, data, source_path=None, is_two_file=True):
             add_ln = len(m_data.splitlines()) + 1
 
         default_template_path = os.path.join(os.path.dirname(__file__), 'template.txt')
-        output, error_level, deps = rst2html(
+        # TODO cache publisher in post object (which requires a ton of
+        # refactoring, and might even need v8 and breaking tons of APIs) for
+        # speed -- right now, we are publishing each post twice
+        publisher = rst2html(
             data, settings_overrides={
                 'initial_header_level': 1,
                 'record_dependencies': True,
@@ -83,12 +93,16 @@ def compile_html_string(self, data, source_path=None, is_two_file=True):
                 'syntax_highlight': 'short',
                 'math_output': 'mathjax',
                 'template': default_template_path,
-            }, logger=self.logger, source_path=source_path, l_add_ln=add_ln, transforms=self.site.rst_transforms)
-        if not isinstance(output, unicode_str):
-            # To prevent some weird bugs here or there.
-            # Original issue: empty files.  `output` became a bytestring.
-            output = output.decode('utf-8')
-        return output, error_level, deps
+            }, logger=self.logger, source_path=source_path, l_add_ln=add_ln, transforms=self.site.rst_transforms, return_publisher=True)
+        if return_publisher:
+            return publisher
+        else:
+            output, error_level, deps = rst_document_tuple(publisher)
+            if not isinstance(output, unicode_str):
+                # To prevent some weird bugs here or there.
+                # Original issue: empty files.  `output` became a bytestring.
+                output = output.decode('utf-8')
+            return output, error_level, deps
 
     def compile_html(self, source, dest, is_two_file=True):
         """Compile source file into HTML and save as dest."""
@@ -112,6 +126,41 @@ def compile_html(self, source, dest, is_two_file=True):
         else:
             return False
 
+    def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
+        """Read the metadata from a post, and return a metadata dict."""
+        metadata = {}
+        source = post.translated_source_path(lang)
+        with io.open(source, 'r', encoding='utf-8') as in_file:
+            data = in_file.read()
+        # This is a bit of a cheat.  The method is now abused to create a
+        # publisher and not the full document tuple.
+        publisher = self.compile_html_string(data, source, post.is_two_file, True)
+
+        # Get title.
+        title_id = publisher.document.first_child_matching_class(docutils.nodes.title)
+        if title_id is not None:
+            metadata['title'] = publisher.document.children[title_id].astext()
+
+        # Get any other metadata that is part of the reST standard docinfo
+        # (which is a special field list)
+        docinfo_id = publisher.document.first_child_matching_class(docutils.nodes.docinfo)
+        if docinfo_id is not None:
+            docinfo = publisher.document.children[docinfo_id]
+            for field in docinfo.children:
+                fieldname = field.tagname
+                if fieldname == 'authors':
+                    field.child_text_separator = ', '
+                    fieldvalue = field.astext()
+                elif fieldname == 'field':
+                    fieldname = field.children[0].astext()
+                    fieldvalue = field.children[1].astext()
+                else:
+                    fieldvalue = field.astext()
+                fieldname = fieldname.lower()
+                fieldname = DOCINFO_PELICAN_NIKOLA_MAPPING.get(fieldname, fieldname)
+                metadata[fieldname] = fieldvalue
+        return metadata
+
     def create_post(self, path, **kw):
         """Create a new post."""
         content = kw.pop('content', None)
@@ -237,11 +286,8 @@ def rst2html(source, source_path=None, source_class=docutils.io.StringInput,
              parser=None, parser_name='restructuredtext', writer=None,
              writer_name='html', settings=None, settings_spec=None,
              settings_overrides=None, config_section=None,
-             enable_exit_status=None, logger=None, l_add_ln=0, transforms=None):
-    """Set up & run a ``Publisher``, and return a dictionary of document parts.
-
-    Dictionary keys are the names of parts, and values are Unicode strings;
-    encoding is up to the client.  For programmatic use with string I/O.
+             enable_exit_status=None, logger=None, l_add_ln=0, transforms=None, return_publisher=False):
+    """Set up & run a ``Publisher``, and return the publisher or the document.
 
     For encoded string input, be sure to set the 'input_encoding' setting to
     the desired encoding.  Set it to 'unicode' for unencoded Unicode string
@@ -275,4 +321,15 @@ def rst2html(source, source_path=None, source_class=docutils.io.StringInput,
     pub.set_destination(None, destination_path)
     pub.publish(enable_exit_status=enable_exit_status)
 
+    if return_publisher:
+        return pub
+    else:
+        return rst_document_tuple(pub)
+
+
+def rst_document_tuple(pub):
+    """Return the document tuple (output, error level, dependencies) for a publisher.
+
+    Previously, the only output of rst2html.
+    """
     return pub.writer.parts['docinfo'] + pub.writer.parts['fragment'], pub.document.reporter.max_level, pub.settings.record_dependencies
diff --git a/nikola/post.py b/nikola/post.py
@@ -486,7 +486,7 @@ def wrap_encrypt(path, password):
         self.compile_html(
             self.translated_source_path(lang),
             dest,
-            self.is_two_file),
+            self.is_two_file)
         if self.meta('password'):
             # TODO: get rid of this feature one day (v8?; warning added in v7.3.0.)
             LOGGER.warn("The post {0} is using the `password` attribute, which may stop working in the future.")
@@ -989,8 +989,13 @@ def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None)
     if getattr(post, 'compiler', None):
         compiler_meta = post.compiler.read_metadata(post, file_metadata_regexp, unslugify_titles, lang)
         meta.update(compiler_meta)
+        if compiler_meta:
+            compiler_meta_override = post.compiler.metadata_can_be_overridden
+        else:
+            compiler_meta_override = True
+
 
-    if not post.is_two_file and not compiler_meta:
+    if not post.is_two_file and compiler_meta_override:
         # Meta file has precedence over file, which can contain garbage.
         # Moreover, we should not to talk to the file if we have compiler meta.
         meta.update(get_metadata_from_file(post.source_path, config, lang))