Skip to content

Commit

Permalink
Reworked search query
Browse files Browse the repository at this point in the history
- still muse multi_match but now with cross_fields type
- add edge n-gram index field since cross_fields works only with match
  and this way we can have prefix support
- suffix support is still supported as before by reversing the words in
  data and query
- for query we now create variations of all multi_match queries. from 2
  words you get 4 queries, from 3 works you get 8 queries and so on.
  • Loading branch information
garbas committed Sep 23, 2020
1 parent 5a65c6f commit 743751f
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 79 deletions.
2 changes: 1 addition & 1 deletion VERSION
@@ -1 +1 @@
12
13
118 changes: 101 additions & 17 deletions import-scripts/import_scripts/channel.py
Expand Up @@ -34,7 +34,22 @@
"normalizer": {
"lowercase": {"type": "custom", "char_filter": [], "filter": ["lowercase"]}
},
"tokenizer": {
"edge": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
# Either we use them or we would need to strip them before that.
"punctuation",
"symbol",
],
},
},
"analyzer": {
"edge": {"tokenizer": "edge"},
"lowercase": {
"type": "custom",
"tokenizer": "keyword",
Expand Down Expand Up @@ -65,22 +80,67 @@
"drv_path": {"type": "keyword"},
},
},
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_attr_name_reverse": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_attr_name_query": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_attr_name_query_reverse": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_attr_set": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_attr_set_reverse": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_pname": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_pname_reverse": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_pname": {"type": "keyword", "normalizer": "lowercase"},
"package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_pversion": {"type": "keyword"},
"package_description": {"type": "text", "analyzer": "english"},
"package_description_reverse": {"type": "text", "analyzer": "english"},
"package_longDescription": {"type": "text", "analyzer": "english"},
"package_longDescription_reverse": {"type": "text", "analyzer": "english"},
"package_description": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_description_reverse": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_longDescription": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_longDescription_reverse": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"package_license": {
"type": "nested",
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
Expand All @@ -98,12 +158,36 @@
"package_homepage": {"type": "keyword"},
"package_system": {"type": "keyword"},
# Options fields
"option_name": {"type": "keyword", "normalizer": "lowercase"},
"option_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
"option_name_query": {"type": "keyword", "normalizer": "lowercase"},
"option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"},
"option_description": {"type": "text", "analyzer": "english"},
"option_description_reverse": {"type": "text", "analyzer": "english"},
"option_name": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_name_reverse": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_name_query": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_name_query_reverse": {
"type": "keyword",
"normalizer": "lowercase",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_description": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_description_reverse": {
"type": "text",
"analyzer": "english",
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
},
"option_type": {"type": "keyword"},
"option_default": {"type": "text"},
"option_example": {"type": "text"},
Expand Down
4 changes: 2 additions & 2 deletions src/Page/Options.elm
Expand Up @@ -288,8 +288,8 @@ makeRequest options channel query from size sort =
sort
"option"
"option_name"
[ ( "option_name", 2.2 )
, ( "option_name_query", 2.0 )
[ ( "option_name", 6.0 )
, ( "option_name_query", 3.0 )
, ( "option_description", 1.0 )
]
)
Expand Down
8 changes: 4 additions & 4 deletions src/Page/Packages.elm
Expand Up @@ -414,10 +414,10 @@ makeRequest options channel query from size sort =
sort
"package"
"package_attr_name"
[ ( "package_attr_name", 2.4 )
, ( "package_pname", 2.2 )
, ( "package_attr_name_query", 2.0 )
, ( "package_description", 1.2 )
[ ( "package_attr_name", 9.0 )
, ( "package_pname", 6.0 )
, ( "package_attr_name_query", 4.0 )
, ( "package_description", 1.3 )
, ( "package_longDescription", 1.0 )
]
)
Expand Down
127 changes: 72 additions & 55 deletions src/Search.elm
Expand Up @@ -62,6 +62,7 @@ import Http
import Json.Decode
import Json.Encode
import RemoteData
import Set
import Task
import Url.Builder

Expand Down Expand Up @@ -753,38 +754,56 @@ filter_by_type type_ =
]


search_fields :
Float
-> List String
searchFields :
String
-> List ( String, Float )
-> List (List ( String, Json.Encode.Value ))
search_fields baseScore queryWords fields =
queryWords
|> List.reverse
|> List.indexedMap
(\queryIndex queryWord ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "bool_prefix" )
, ( "query", Json.Encode.string queryWord )
, ( "analyzer", Json.Encode.string "lowercase" )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "prefix_length", Json.Encode.int 3 )
, ( "operator", Json.Encode.string "or" )
, ( "_name"
, Json.Encode.string <| "multi_match_" ++ queryWord ++ "_" ++ (queryIndex + 1 |> String.fromInt)
)
, ( "fields"
, Json.Encode.list Json.Encode.string
(List.map
(\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat))
fields
)
)
]
)
]
)
searchFields query fields =
let
queryVariations q =
case ( List.head q, List.tail q ) of
( Just h, Just t ) ->
let
tail : List (List String)
tail =
queryVariations t
in
List.append
(List.map (\x -> List.append [ h ] x) tail)
(List.map (\x -> List.append [ String.reverse h ] x) tail)
|> Set.fromList
|> Set.toList

( Just h, Nothing ) ->
[ [ h ], [ String.reverse h ] ]

( _, _ ) ->
[ [], [] ]

reverseFields =
List.map (\( field, score ) -> ( field ++ "_reverse", score * 0.8 )) fields

allFields =
List.append fields reverseFields
|> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ])
|> List.concat
in
List.map
(\queryWords ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "cross_fields" )
, ( "query", Json.Encode.string <| String.join " " queryWords )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "operator", Json.Encode.string "and" )
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
, ( "fields", Json.Encode.list Json.Encode.string allFields )
]
)
]
)
(queryVariations (String.words query))


makeRequestBody :
Expand Down Expand Up @@ -830,31 +849,29 @@ makeRequestBody query from sizeRaw sort type_ sortField fields =
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
[ [ ( "bool"
, Json.Encode.object
[ ( "must"
, Json.Encode.list Json.Encode.object <|
search_fields
1.0
(String.words query)
fields
)
]
)
]
, [ ( "bool"
, Json.Encode.object
[ ( "must"
, Json.Encode.list Json.Encode.object <|
search_fields
0.8
(String.words query |> List.map String.reverse)
(List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields)
)
]
)
]
]
(searchFields query fields)
-- [ [ ( "bool"
-- , Json.Encode.object
-- [ ( "must"
-- , Json.Encode.list Json.Encode.object <|
-- searchFields query fields
-- )
-- ]
-- )
-- ]
-- ]
-- , [ ( "bool"
-- , Json.Encode.object
-- [ ( "must"
-- , Json.Encode.list Json.Encode.object <|
-- searchFields
-- 0.8
-- (String.words query |> List.map String.reverse)
-- )
-- ]
-- )
-- ]
--]
)
]
)
Expand Down

0 comments on commit 743751f

Please sign in to comment.