Skip to content

Commit

Permalink
better html_unescape tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed Apr 23, 2012
1 parent 59fccf1 commit 82b9f76
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 26 deletions.
1 change: 1 addition & 0 deletions Changes
Expand Up @@ -2,6 +2,7 @@ This file documents the revision history for Perl extension Mojolicious.

2.88 2012-04-24
- Improved documentation.
- Improved tests.

2.87 2012-04-23
- Improved html_escape performance and added pattern support.
Expand Down
45 changes: 21 additions & 24 deletions lib/Mojo/Util.pm
Expand Up @@ -2375,9 +2375,6 @@ $REVERSE{$ENTITIES{$_}} //= $_ for reverse sort keys %ENTITIES;
# "apos"
$ENTITIES{'apos;'} = "\x{0027}";

# Entities regex for html_unescape
my $ENTITIES_RE = qr/&(?:\#((?:\d{1,7}|x[0-9A-Fa-f]{1,6}));|([\w\.]+;?))/;

# Encode cache
my %ENCODE;

Expand Down Expand Up @@ -2475,15 +2472,16 @@ sub html_escape {
my ($string, $pattern) = @_;
$pattern ||= '^\n\r\t !\#\$%\(-;=?-~';
return $string unless $string =~ /[^$pattern]/;
$string =~ s/([$pattern])/_escape($1)/ge;
$string =~ s/([$pattern])/_encode($1)/ge;
return $string;
}

# "Daddy, I'm scared. Too scared to even wet my pants.
# Just relax and it'll come, son."
sub html_unescape {
my $string = shift;
$string =~ s/$ENTITIES_RE/_unescape($1, $2)/ge;
$string
=~ s/&(?:\#((?:\d{1,7}|x[0-9A-Fa-f]{1,6}));|([\w\.]+;?))/_decode($1,$2)/ge;
return $string;
}

Expand Down Expand Up @@ -2698,10 +2696,25 @@ sub _adapt {
return $k + (((PC_BASE - PC_TMIN + 1) * $delta) / ($delta + PC_SKEW));
}

# Helper for html_unescape
sub _decode {

# Numeric
return substr($_[0], 0, 1) eq 'x' ? chr(hex $_[0]) : chr($_[0]) unless $_[1];

# Find entity name
my $rest = '';
my $entity = $_[1];
while (length $entity) {
return "$ENTITIES{$entity}$rest" if exists $ENTITIES{$entity};
$rest = chop($entity) . $rest;
}
return "&$_[1]";
}

# Helper for html_escape
sub _escape {
return "&$REVERSE{$_[0]}" if exists $REVERSE{$_[0]};
return '&#' . ord($_[0]) . ';';
sub _encode {
return exists $REVERSE{$_[0]} ? "&$REVERSE{$_[0]}" : "&#@{[ord($_[0])]};";
}

sub _hmac {
Expand All @@ -2720,22 +2733,6 @@ sub _hmac {
return unpack 'H*', $hash->($opad . $hash->($ipad . $string));
}

# Helper for html_unescape
sub _unescape {

# Numeric
return substr($_[0], 0, 1) eq 'x' ? chr(hex $_[0]) : chr($_[0]) unless $_[1];

# Find entity name
my $rest = '';
my $entity = $_[1];
while (length $entity) {
return "$ENTITIES{$entity}$rest" if exists $ENTITIES{$entity};
$rest = chop($entity) . $rest;
}
return "&$_[1]";
}

1;
__END__
Expand Down
4 changes: 2 additions & 2 deletions t/mojo/bytestream.t
Expand Up @@ -203,8 +203,8 @@ $stream = b('foobar');
is $stream->html_escape, 'foobar', 'right html escaped result';

# html_unescape
$stream = b('foobar<baz>&"');
is $stream->html_unescape, "foobar<baz>&\"", 'right html unescaped result';
$stream = b('&#x3c;foo&#x3E;bar&lt;baz&gt;&#x26;&#34;');
is $stream->html_unescape, "<foo>bar<baz>&\"", 'right html unescaped result';

# html_unescape (special entities)
$stream = b('foo &CounterClockwiseContourIntegral; bar &b.Sigma; &sup1baz');
Expand Down

0 comments on commit 82b9f76

Please sign in to comment.