Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add custom analyzer for nixos option names
  • Loading branch information
adisbladis committed Jun 3, 2020
1 parent 42ae384 commit 8e9772b
Showing 1 changed file with 50 additions and 2 deletions.
52 changes: 50 additions & 2 deletions scripts/import-channel
Expand Up @@ -37,6 +37,16 @@ ANALYSIS = {
"tokenizer": "nix_attrname",
"filter": ["lowercase", "nix_stopwords"],
},
"nixOptionName": {
"type": "custom",
"tokenizer": "nix_option_name",
"filter": ["lowercase"],
},
"nixOptionNameGranular": {
"type": "custom",
"tokenizer": "nix_option_name_granular",
"filter": ["lowercase"],
},
},
"tokenizer": {
"nix_attrname": {
Expand All @@ -60,6 +70,31 @@ ANALYSIS = {
]
),
},
"nix_option_name": {
"type": "pattern",
"pattern": "[.]",
},
# Lower priority (virtualHost -> [virtual, host])
"nix_option_name_granular": {
"type": "pattern",
# Split on attrname separators like _, .
"pattern": "|".join(
[
"[_.-]", # Common separators like underscores, dots and dashes
# Camelcase tokenizer adapted from
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
"".join(
[
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
"(?=\\p{Lu})", # followed by upper case
"|",
"(?<=\\p{Lu})" # or upper case
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
]
),
]
),
},
},
"filter": {
"nix_stopwords": {
Expand Down Expand Up @@ -106,7 +141,20 @@ MAPPING = {
"package_position": {"type": "text"},
"package_homepage": {"type": "keyword"},
# Options fields
"option_name": {"type": "keyword"},
"option_name": {
"type": "text",
"analyzer": "nixOptionName",
"fielddata": True,
"fields": {
"raw": {
"type": "keyword"
},
"granular": {
"type": "text",
"analyzer": "nixOptionNameGranular",
},
},
},
"option_description": {"type": "text"},
"option_type": {"type": "keyword"},
"option_default": {"type": "text"},
Expand Down Expand Up @@ -291,7 +339,7 @@ def ensure_index(es, index, mapping):
},
)
logger.debug(f"ensure_index: index '{index}' was created")

return True


Expand Down

0 comments on commit 8e9772b

Please sign in to comment.