Skip to content

Commit

Permalink
Item13741: Reverse URL encoding in search
Browse files Browse the repository at this point in the history
And by using entity encoding, it's possible now to search on literal
strings that include entities.
  • Loading branch information
gac410 committed Sep 24, 2015
1 parent f046d4d commit 90dad31
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 10 deletions.
75 changes: 75 additions & 0 deletions UnitTestContrib/test/unit/Fn_SEARCH.pm
Expand Up @@ -6889,4 +6889,79 @@ HERE
return;
}

sub verify_decode_options {
my $this = shift;

my $search = <<'HERE';
%SEARCH{
"%ENCODE{"'" type="entities"}%"
type="literal"
decode="entity"
scope="all"
web="TestCases"
}%
HERE
my $result = $this->{test_topicObject}->expandMacros($search);

# Should get the default search order (or an error message, perhaps?)
$this->assert( 0, $result )
unless $result =~ /Number of topics: <span>(\d+)<\/span>/;
$this->assert( 0, $result )
unless $result =~ /Searched: <b><noautolink>'<\/noautolink><\/b>/;

$search = <<'HERE';
%SEARCH{
"%ENCODE{"' \" < > and %" type="safe"}%"
type="literal"
decode="safe"
scope="all"
web="TestCases"
}%
HERE
$result = $this->{test_topicObject}->expandMacros($search);

# Should get the default search order (or an error message, perhaps?)
$this->assert( 0, $result )
unless $result =~ /Number of topics: <span>(\d+)<\/span>/;
$this->assert( 0, $result )
unless $result =~
/Searched: <b><noautolink>' " &lt; &gt; and %<\/noautolink><\/b>/;

$search = <<'HERE';
%SEARCH{
"%ENCODE{"'" type="url"}%"
type="literal"
decode="url"
scope="all"
web="TestCases"
}%
HERE
$result = $this->{test_topicObject}->expandMacros($search);

# Should get the default search order (or an error message, perhaps?)
$this->assert( 0, $result )
unless $result =~ /Number of topics: <span>(\d+)<\/span>/;
$this->assert( 0, $result )
unless $result =~ /Searched: <b><noautolink>'<\/noautolink><\/b>/;

$search = <<'HERE';
%SEARCH{
"%ENCODE{"%ENCODE{"'" type="url"}%" type="entities"}%"
type="literal"
decode="entities, url"
scope="all"
web="TestCases"
}%
HERE
$result = $this->{test_topicObject}->expandMacros($search);

# Should get the default search order (or an error message, perhaps?)
$this->assert( 0, $result )
unless $result =~ /Number of topics: <span>(\d+)<\/span>/;
$this->assert( 0, $result )
unless $result =~ /Searched: <b><noautolink>'<\/noautolink><\/b>/;

return;
}

1;
9 changes: 5 additions & 4 deletions core/data/System/VarENCODE.txt
@@ -1,4 +1,4 @@
%META:TOPICINFO{author="ProjectContributor" date="1434650530" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" date="1443119042" format="1.1" version="1"}%
%META:TOPICPARENT{name="Macros"}%
---+ ENCODE -- encode characters in a string
Encode character sequences in ="string"=, by mapping characters (or sequences of characters) to an alternative character (or sequence of characters). This macro can be used to encode strings for use in URLs, to encode to HTML entities, to protect quotes, and for as many other uses as you can imagine.
Expand All @@ -12,9 +12,10 @@ Encode character sequences in ="string"=, by mapping characters (or sequences of
If =ENCODE= is called with no optional parameters (e.g. =%<nop>ENCODE{"string"}%=) then the default =type="url"= encoding will be used.
---++ Predefined encodings
Unless otherwise specified, the =type= parameter encodes the following "special characters"
* all non-printable ASCII characters below space, except newline (="\n"=) and carriage return (="\r"=)
* HTML special characters ="<"=, =">"=, ="&"=, single quote (='=) and double quote (="=)
* TML special characters ="%"=, ="["=, ="]"=, ="@"=, ="_"=, ="*"=, ="="= and ="|"=
* =type= not specified:
* all non-printable ASCII characters below space, except newline (="\n"=) and carriage return (="\r"=)
* HTML special characters ="<"=, =">"=, ="&"=, single quote (='=) and double quote (="=)
* TML special characters ="%"=, ="["=, ="]"=, ="@"=, ="_"=, ="*"=, ="="= and ="|"=
* =type="entity"= or =type="entities"= Encode special characters into HTML entities, like a double quote into =&amp;#034;=. Does *not* encode =\n= (newline).
* =type="html"= As =type="entity"= except it also encodes =\n= (newline)
* =type="safe"= Encode just the characters ='"&lt;&gt;%= into HTML entities.
Expand Down
3 changes: 2 additions & 1 deletion core/data/System/VarSEARCH.txt
@@ -1,4 +1,4 @@
%META:TOPICINFO{author="ProjectContributor" date="1434650530" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" date="1443119042" format="1.1" version="1"}%
%META:TOPICPARENT{name="Macros"}%
---+ SEARCH -- search content
Inline search, shows a search result embedded in a topic
Expand All @@ -24,6 +24,7 @@ A topic, a topic with asterisk wildcards, or a list of topics separated by comma
| =date= | limits the results to those pages with latest edit time in the given [[TimeSpecifications#TimeIntervals][time interval]]. | |
| =reverse= | If ="on"= will reverse the direction of the search. Does only apply to key specified by =order=. | =off= |
| =casesensitive= | If ="on"= perform a case sensitive search. (For =type=query= searches, =casesensitive= is always =on=. See QuerySearch for more flexible case comparison options) | =off= |
| =decode= | Reverse any encoding done to protect search terms by =%<nop>URLPARAM{}%= macro. Comma separated list of encodings, entered in reverse order of the =URLPARAM= macro arguments. Supported decoding types are =entity%VBAR%entities, safe and url=. | |
| =bookview= | If ="on", perform a BookView search, e.g. show complete topic text. Very resource demanding. Use only with small result sets | =off= |
| =nonoise= | If ="on"=, shorthand for =nosummary="on" nosearch="on" nototal="on" zeroresults="off" noheader="on" noempty="on"= | =off= |
| =nosummary= | Show topic title only, no content summary | =off= |
Expand Down
6 changes: 4 additions & 2 deletions core/data/System/VarURLPARAM.txt
@@ -1,4 +1,4 @@
%META:TOPICINFO{author="ProjectContributor" date="1442930527" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" date="1443119042" format="1.1" version="1"}%
%META:TOPICPARENT{name="Macros"}%
---+ URLPARAM -- get URL or HTTP POST parameter value
Returns the value of the named parameter in the URL or HTTP POST request.
Expand All @@ -12,7 +12,7 @@ Returns the value of the named parameter in the URL or HTTP POST request.
="entity"= - Encode special characters into HTML entities. See [[VarENCODE][ENCODE]] for more details. <br /> \
="safe"= - Encode characters ='"&lt;&gt;%= into HTML entities. <br />\
="url"= - Encode special characters for URL parameter use, like a double quote into =%22= <br />\
="quote"= - Escape double quotes with backslashes (=\"=), does not change other characters; required when feeding URL parameters into other macros. \
="quote"= - Escape double quotes with backslashes (=\"=), does not change other characters; required when feeding URL parameters into other macros.<br/>\
You can combine several encodings together, and they will be applied in the order you specify e.g. =encode="safe, quote"= | =safe= |
| =multiple= | If set, gets all selected elements of a =&lt;select multiple="multiple"&gt;= tag. Can be set to a format string, with =$item= indicating the element, e.g. =multiple="Option: $item"= (also supports the standard [[format tokens]]) | first element |
| =separator= | Separator between multiple selections. Only relevant if multiple is specified | =$n= (new line) |
Expand All @@ -24,6 +24,8 @@ Returns the value of the named parameter in the URL or HTTP POST request.

%H% Double quotes in URL parameters must be escaped when passed into other macros.%BR% Example: =%<nop>SEARCH{ "%<nop>URLPARAM{ "search" encode="safe, quote" }%" noheader="on" }%=

%H% Reverse the encoding when used in SEARCH.%BR% Example: =%<nop>SEARCH{ "%<nop>URLPARAM{ "search" encode="safe, quote"}%" decode="safe" noheader="on" }%=. (It is not necessary to reverse quote encoding, otherwise =decode== options should be specified in the reverse order from the =encode== options.)

%H% When used in a template topic, this macro will be expanded when the template is used to create a new topic. See TemplateTopics#TemplateTopicsVars for details.

%X% Watch out for internal parameters, such as =rev=, =skin=, =template=, =topic=, =web=; they have a special meaning in Foswiki. Common parameters and view script specific parameters are documented at CommandAndCGIScripts.
Expand Down
6 changes: 3 additions & 3 deletions core/data/System/WebSearch.txt
@@ -1,4 +1,4 @@
%META:TOPICINFO{author="ProjectContributor" date="1442930527" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" date="1443119042" format="1.1" version="1"}%
%META:TOPICPARENT{name="WebHome"}%
%IF{
"$'URLPARAM{tab}'='search' OR $'URLPARAM{tab}'='' AND NOT $TAB='advanced'"
Expand Down Expand Up @@ -238,7 +238,7 @@
then="<h2>%MAKETEXT{"Search results"}%</h2>
$percentINCLUDE{$quot%SYSTEMWEB%.%TOPIC%$quot section=$quotsearchfeed$quot}$percent"
}%%SEARCH{
"%URLPARAM{"search" encode="safe, quote"}%"
"%URLPARAM{"search" encode="entities, quote"}%" decode="entities"
type="%URLPARAM{"type" default="word"}%"
scope="%URLPARAM{"scope" encode="safe, quote"}%"
web="%URLPARAM{"web" encode="safe, quote"}%"%IF{
Expand Down Expand Up @@ -284,7 +284,7 @@ $percentINCLUDE{$quot%SYSTEMWEB%.%TOPIC%$quot section=$quotsearchfeed$quot}$perc
%MAKETEXT{"To display the above search results in a topic, copy-paste the following markup:"}%
<pre class='tml'>
%<nop>SEARCH{
"%URLPARAM{"search" encode="safe, quote"}%"
"%URLPARAM{"search" encode="entities, quote"}%"
type="%URLPARAM{"type" default="%SEARCHDEFAULTTTYPE%"}%"%FORMAT{
"scope,web,recurse,nosearch,casesensitive,nosummary,nototal,order,reverse,limit,search"
type="string"
Expand Down
29 changes: 29 additions & 0 deletions core/lib/Foswiki/Search.pm
Expand Up @@ -307,8 +307,37 @@ sub searchWeb {

my $revSort = Foswiki::isTrue( $params{reverse} );
$params{scope} = $params{scope} || '';

my $searchString = defined $params{search} ? $params{search} : '';

# Reverse encoding done by %URPARAM{ ... encode= }% if requested
if ( length($searchString) && defined $params{decode} ) {
foreach my $e ( split( /\s*,\s*/, $params{decode} ) ) {
if ( $e =~ m/entit(y|ies)/i ) {
$searchString = Foswiki::entityDecode($searchString);
}
elsif ( $e =~ m/^quotes?$/i ) {

#nop - reversing of quote escaping not needed?
}
elsif ( $e =~ m/^url$/i ) {

$searchString = Foswiki::urlDecode($searchString);
}
elsif ( $e =~ m/^safe$/i ) {

# entity decode ' " < > and %
# &#39;&#34;&#60;&#62;&#37;
$searchString =~ s/(&#(39|34|60|62|37);)/chr($2)/ge;
}
else {
throw Error::Simple(
'Unknown decode type requested: Valid types are entity, entities, safe and url.'
);
}
}
}

$params{includeTopics} = $params{topic} || '';
$params{type} = $params{type} || '';

Expand Down

0 comments on commit 90dad31

Please sign in to comment.