Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Rename UNSLUGIFY_TITLES → FILE_METADATA_UNSLUGIFY_TITLES Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Initial scaffolding for metadata extractor plugins Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Use correct environment marker syntax Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Style fixes Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Use metadata extractor APIs and plugins Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Fix galleries; add basic split support (temporary) Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Fix compatibility with .meta files Signed-off-by: Chris Warrick <kwpolska@gmail.com> * #2856 was effectively undone Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Make metadata splitting work Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Code cleanups Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Fix tests, add new config_present condition Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Allow whitespace before Nikola-style metadata Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Fix RSS tests Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Add MetadataExtractors documentation and `override` MetaPriority Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Style fixes Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Add tests for metadata extractors Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Turns out we run flake8 on tests, too Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Make write_metadata work with metadata extractors Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Fix #2830 — add MetadataExtractor plugins Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Minor documentation fixes [ci skip] Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Address review by @felixfontein Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Remove useless comment [ci skip] Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Slightly smarter return values for split_metadata_for_text Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Pass site when writing metadata in importers Note this is a minor API change (removes @staticmethod) that needs to be reflected in some importers. cc @felixfontein Signed-off-by: Chris Warrick <kwpolska@gmail.com> * Document potentially confusing split_metadata_from_text behavior Signed-off-by: Chris Warrick <kwpolska@gmail.com>
- Loading branch information
Showing
39 changed files
with
832 additions
and
308 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,258 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright © 2012-2017 Chris Warrick, Roberto Alsina and others. | ||
|
||
# Permission is hereby granted, free of charge, to any | ||
# person obtaining a copy of this software and associated | ||
# documentation files (the "Software"), to deal in the | ||
# Software without restriction, including without limitation | ||
# the rights to use, copy, modify, merge, publish, | ||
# distribute, sublicense, and/or sell copies of the | ||
# Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice | ||
# shall be included in all copies or substantial portions of | ||
# the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | ||
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS | ||
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
"""Default metadata extractors and helper functions.""" | ||
|
||
import re | ||
import natsort | ||
|
||
from enum import Enum | ||
from nikola.plugin_categories import MetadataExtractor | ||
from nikola.utils import unslugify | ||
|
||
__all__ = ('MetaCondition', 'MetaPriority', 'MetaSource', 'check_conditions') | ||
_default_extractors = [] | ||
DEFAULT_EXTRACTOR_NAME = 'nikola' | ||
DEFAULT_EXTRACTOR = None | ||
|
||
|
||
class MetaCondition(Enum): | ||
"""Conditions for extracting metadata.""" | ||
|
||
config_bool = 1 | ||
config_present = 2 | ||
extension = 3 | ||
compiler = 4 | ||
first_line = 5 | ||
never = -1 | ||
|
||
|
||
class MetaPriority(Enum): | ||
"""Priority of metadata. | ||
An extractor is used if and only if the higher-priority extractors returned nothing. | ||
""" | ||
|
||
override = 1 | ||
specialized = 2 | ||
normal = 3 | ||
fallback = 4 | ||
|
||
|
||
class MetaSource(Enum): | ||
"""Source of metadata.""" | ||
|
||
text = 1 | ||
filename = 2 | ||
|
||
|
||
def check_conditions(post, filename: str, conditions: list, config: dict, source_text: str) -> bool: | ||
"""Check the conditions for a metadata extractor.""" | ||
for ct, arg in conditions: | ||
if any(( | ||
ct == MetaCondition.config_bool and not config.get(arg, False), | ||
ct == MetaCondition.config_present and arg not in config, | ||
ct == MetaCondition.extension and not filename.endswith(arg), | ||
ct == MetaCondition.compiler and post.compiler.name != arg, | ||
ct == MetaCondition.never | ||
)): | ||
return False | ||
elif ct == MetaCondition.first_line: | ||
if not source_text or not source_text.startswith(arg + '\n'): | ||
return False | ||
return True | ||
|
||
|
||
def classify_extractor(extractor: MetadataExtractor, metadata_extractors_by: dict): | ||
"""Classify an extractor and add it to the metadata_extractors_by dict.""" | ||
global DEFAULT_EXTRACTOR | ||
if extractor.name == DEFAULT_EXTRACTOR_NAME: | ||
DEFAULT_EXTRACTOR = extractor | ||
metadata_extractors_by['priority'][extractor.priority].append(extractor) | ||
metadata_extractors_by['source'][extractor.source].append(extractor) | ||
metadata_extractors_by['name'][extractor.name] = extractor | ||
metadata_extractors_by['all'].append(extractor) | ||
|
||
|
||
def load_defaults(site: 'nikola.nikola.Nikola', metadata_extractors_by: dict): | ||
"""Load default metadata extractors.""" | ||
for extractor in _default_extractors: | ||
extractor.site = site | ||
classify_extractor(extractor, metadata_extractors_by) | ||
|
||
|
||
def is_extractor(extractor) -> bool: | ||
"""Check if a given class is an extractor.""" | ||
return isinstance(extractor, MetadataExtractor) | ||
|
||
|
||
def default_metadata_extractors_by() -> dict: | ||
"""Return the default metadata_extractors_by dictionary.""" | ||
d = { | ||
'priority': {}, | ||
'source': {}, | ||
'name': {}, | ||
'all': [] | ||
} | ||
|
||
for i in MetaPriority: | ||
d['priority'][i] = [] | ||
for i in MetaSource: | ||
d['source'][i] = [] | ||
|
||
return d | ||
|
||
|
||
def _register_default(extractor: MetadataExtractor) -> MetadataExtractor: | ||
"""Register a default extractor.""" | ||
_default_extractors.append(extractor()) | ||
return extractor | ||
|
||
|
||
@_register_default | ||
class NikolaMetadata(MetadataExtractor): | ||
"""Extractor for Nikola-style metadata.""" | ||
|
||
name = 'nikola' | ||
source = MetaSource.text | ||
priority = MetaPriority.normal | ||
supports_write = True | ||
split_metadata_re = re.compile('\n\n') | ||
nikola_re = re.compile(r'^\s*\.\. (.*?): (.*)') | ||
|
||
def _extract_metadata_from_text(self, source_text: str) -> dict: | ||
"""Extract metadata from text.""" | ||
outdict = {} | ||
for line in source_text.split('\n'): | ||
match = self.nikola_re.match(line) | ||
if match: | ||
outdict[match.group(1)] = match.group(2) | ||
return outdict | ||
|
||
def write_metadata(self, metadata: dict, comment_wrap=False) -> str: | ||
"""Write metadata in this extractor’s format.""" | ||
metadata = metadata.copy() | ||
order = ('title', 'slug', 'date', 'tags', 'category', 'link', 'description', 'type') | ||
f = '.. {0}: {1}' | ||
meta = [] | ||
for k in order: | ||
try: | ||
meta.append(f.format(k, metadata.pop(k))) | ||
except KeyError: | ||
pass | ||
# Leftover metadata (user-specified/non-default). | ||
for k in natsort.natsorted(list(metadata.keys()), alg=natsort.ns.F | natsort.ns.IC): | ||
meta.append(f.format(k, metadata[k])) | ||
data = '\n'.join(meta) | ||
if comment_wrap is True: | ||
comment_wrap = ('<!--', '-->') | ||
if comment_wrap: | ||
return '\n'.join((comment_wrap[0], data, comment_wrap[1], '', '')) | ||
else: | ||
return data + '\n\n' | ||
|
||
|
||
@_register_default | ||
class YAMLMetadata(MetadataExtractor): | ||
"""Extractor for YAML metadata.""" | ||
|
||
name = 'yaml' | ||
source = MetaSource.text | ||
conditions = ((MetaCondition.first_line, '---'),) | ||
requirements = [('yaml', 'PyYAML', 'YAML')] | ||
supports_write = True | ||
split_metadata_re = re.compile('\n---\n') | ||
map_from = 'yaml' | ||
priority = MetaPriority.specialized | ||
|
||
def _extract_metadata_from_text(self, source_text: str) -> dict: | ||
"""Extract metadata from text.""" | ||
import yaml | ||
meta = yaml.safe_load(source_text[4:]) | ||
# We expect empty metadata to be '', not None | ||
for k in meta: | ||
if meta[k] is None: | ||
meta[k] = '' | ||
return meta | ||
|
||
def write_metadata(self, metadata: dict, comment_wrap=False) -> str: | ||
"""Write metadata in this extractor’s format.""" | ||
import yaml | ||
return '\n'.join(('---', yaml.safe_dump(metadata, default_flow_style=False).strip(), '---', '')) | ||
|
||
|
||
@_register_default | ||
class TOMLMetadata(MetadataExtractor): | ||
"""Extractor for TOML metadata.""" | ||
|
||
name = 'toml' | ||
source = MetaSource.text | ||
conditions = ((MetaCondition.first_line, '+++'),) | ||
requirements = [('toml', 'toml', 'TOML')] | ||
supports_write = True | ||
split_metadata_re = re.compile('\n\\+\\+\\+\n') | ||
map_from = 'toml' | ||
priority = MetaPriority.specialized | ||
|
||
def _extract_metadata_from_text(self, source_text: str) -> dict: | ||
"""Extract metadata from text.""" | ||
import toml | ||
return toml.loads(source_text[4:]) | ||
|
||
def write_metadata(self, metadata: dict, comment_wrap=False) -> str: | ||
"""Write metadata in this extractor’s format.""" | ||
import toml | ||
return '\n'.join(('+++', toml.dumps(metadata).strip(), '+++', '')) | ||
|
||
|
||
@_register_default | ||
class FilenameRegexMetadata(MetadataExtractor): | ||
"""Extractor for filename metadata.""" | ||
|
||
name = 'filename_regex' | ||
source = MetaSource.filename | ||
priority = MetaPriority.fallback | ||
conditions = [(MetaCondition.config_bool, 'FILE_METADATA_REGEXP')] | ||
|
||
def extract_filename(self, filename: str, lang: str) -> dict: | ||
"""Try to read the metadata from the filename based on the given re. | ||
This requires to use symbolic group names in the pattern. | ||
The part to read the metadata from the filename based on a regular | ||
expression is taken from Pelican - pelican/readers.py | ||
""" | ||
match = re.match(self.site.config['FILE_METADATA_REGEXP'], filename) | ||
meta = {} | ||
|
||
if match: | ||
for key, value in match.groupdict().items(): | ||
k = key.lower().strip() # metadata must be lowercase | ||
if k == 'title' and self.site.config['FILE_METADATA_UNSLUGIFY_TITLES']: | ||
meta[k] = unslugify(value, lang, discard_numbers=False) | ||
else: | ||
meta[k] = value | ||
|
||
return meta |
Oops, something went wrong.