Skip to content

Commit

Permalink
improved unicode handling to be more correct and less forgiving
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed Sep 3, 2011
1 parent 6641161 commit fce07e9
Show file tree
Hide file tree
Showing 15 changed files with 35 additions and 52 deletions.
3 changes: 2 additions & 1 deletion Changes
@@ -1,6 +1,7 @@
This file documents the revision history for Perl extension Mojolicious.

1.97 2011-09-02 00:00:00
1.97 2011-09-03 00:00:00
- Improved unicode handling to be more correct and less forgiving.
- Fixed typos.

1.96 2011-09-02 00:00:00
Expand Down
1 change: 0 additions & 1 deletion lib/Mojo/Asset/File.pm
Expand Up @@ -66,7 +66,6 @@ sub add_chunk {

# Append to file
$chunk = '' unless defined $chunk;
utf8::encode $chunk if utf8::is_utf8 $chunk;
$self->handle->syswrite($chunk, length $chunk);

return $self;
Expand Down
1 change: 0 additions & 1 deletion lib/Mojo/Asset/Memory.pm
Expand Up @@ -15,7 +15,6 @@ sub new {

sub add_chunk {
my ($self, $chunk) = @_;
utf8::encode $chunk if utf8::is_utf8 $chunk;
$self->{content} .= $chunk if defined $chunk;
return $self;
}
Expand Down
1 change: 0 additions & 1 deletion lib/Mojo/ByteStream.pm
Expand Up @@ -139,7 +139,6 @@ sub quote {
sub say {
my ($self, $handle) = @_;
$handle ||= \*STDOUT;
utf8::encode $$self if utf8::is_utf8 $$self;
print $handle "$$self\n";
}

Expand Down
5 changes: 2 additions & 3 deletions lib/Mojo/DOM.pm
Expand Up @@ -218,9 +218,8 @@ sub parent {
}

sub parse {
my ($self, $xml) = @_;
$self->charset(undef) if utf8::is_utf8 $xml;
$self->[0]->parse($xml);
my $self = shift;
$self->[0]->parse(@_);
return $self;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Mojo/DOM/HTML.pm
Expand Up @@ -96,7 +96,7 @@ sub parse {

# Decode
my $charset = $self->charset;
decode $charset, $html if $charset && !utf8::is_utf8 $html;
decode $charset, $html if $charset;

# Tokenize
my $tree = ['root'];
Expand Down
3 changes: 1 addition & 2 deletions lib/Mojo/Log.pm
Expand Up @@ -35,8 +35,7 @@ sub fatal { shift->log('fatal', @_) }

sub format {
my ($self, $level, @msgs) = @_;
my $msgs = join "\n",
map { utf8::decode $_ unless utf8::is_utf8 $_; $_ } @msgs;
my $msgs = join "\n", @msgs;
return '[' . localtime(time) . "] [$level] $msgs\n";
}

Expand Down
2 changes: 2 additions & 0 deletions lib/Mojo/Path.pm
Expand Up @@ -80,6 +80,7 @@ sub parse {

# Parse
url_unescape $path;
utf8::decode $path;
my @parts;
for my $part (split '/', $path) {

Expand Down Expand Up @@ -109,6 +110,7 @@ sub to_string {
my @path;
for my $part (@{$self->parts}) {
my $escaped = $part;
utf8::encode $escaped;
url_escape $escaped, "$Mojo::URL::UNRESERVED$Mojo::URL::SUBDELIM\:\@";
push @path, $escaped;
}
Expand Down
33 changes: 6 additions & 27 deletions lib/Mojo/Util.pm
Expand Up @@ -301,10 +301,7 @@ push @EXPORT_OK, qw/url_escape url_unescape xml_escape/;

sub b64_decode { $_[0] = MIME::Base64::decode_base64($_[0]); }

sub b64_encode {
utf8::encode $_[0] if utf8::is_utf8 $_[0];
$_[0] = MIME::Base64::encode_base64($_[0], $_[1]);
}
sub b64_encode { $_[0] = MIME::Base64::encode_base64($_[0], $_[1]) }

sub camelize {
return if $_[0] =~ /^[A-Z]/;
Expand Down Expand Up @@ -419,17 +416,9 @@ sub html_unescape {
/_unescape($1, $2)/gex;
}

sub md5_bytes {
my $data = shift;
utf8::encode $data if utf8::is_utf8 $data;
_md5($data);
}
sub md5_bytes { _md5(@_) }

sub md5_sum {
my $data = shift;
utf8::encode $data if utf8::is_utf8 $data;
Digest::MD5::md5_hex($data);
}
sub md5_sum { Digest::MD5::md5_hex(@_) }

sub punycode_decode {
use integer;
Expand Down Expand Up @@ -557,10 +546,7 @@ sub punycode_encode {

sub qp_decode { $_[0] = MIME::QuotedPrint::decode_qp($_[0]) }

sub qp_encode {
utf8::encode $_[0] if utf8::is_utf8 $_[0];
$_[0] = MIME::QuotedPrint::encode_qp($_[0]);
}
sub qp_encode { $_[0] = MIME::QuotedPrint::encode_qp($_[0]) }

sub quote {

Expand All @@ -577,20 +563,14 @@ sub secure_compare {
return $r == 0 ? 1 : undef;
}

sub sha1_bytes {
my $data = shift;
utf8::encode $data if utf8::is_utf8 $data;
_sha1($data);
}
sub sha1_bytes { _sha1(@_) }

sub sha1_sum {
die <<'EOF' unless SHA1;
Module "Digest::SHA" not present in this version of Perl.
Please install it manually or upgrade Perl to at least version 5.10.
EOF
my $data = shift;
utf8::encode $data if utf8::is_utf8 $data;
Digest::SHA::sha1_hex($data);
Digest::SHA::sha1_hex(@_);
}

sub trim {
Expand Down Expand Up @@ -620,7 +600,6 @@ sub url_escape {
my $pattern = $_[1] || 'A-Za-z0-9\-\.\_\~';

# Escape
utf8::encode $_[0] if utf8::is_utf8 $_[0];
return unless $_[0] =~ /[^$pattern]/;
$_[0] =~ s/([^$pattern])/sprintf('%%%02X',ord($1))/ge;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojolicious/Command/get.pm
Expand Up @@ -139,7 +139,7 @@ sub run {

# Error
my ($message, $code) = $tx->error;
utf8::encode $url if utf8::is_utf8 $url;
utf8::encode $url;
warn qq/Problem loading URL "$url". ($message)\n/ if $message && !$code;

# Charset
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojolicious/Command/inflate.pm
Expand Up @@ -45,7 +45,7 @@ sub run {
my $prefix = $file =~ /\.\w+\.\w+$/ ? $templates : $public;
my $path = $self->rel_file("$prefix/$file");
my $content = $all->{$file};
utf8::encode $content if utf8::is_utf8 $content;
utf8::encode $content;
$self->write_file($path, $content);
}
}
Expand Down
5 changes: 4 additions & 1 deletion lib/Mojolicious/Plugin/EPLRenderer.pm
Expand Up @@ -17,7 +17,10 @@ sub register {
# Template
my $inline = $options->{inline};
my $path = $r->template_path($options);
$path = md5_sum $inline if defined $inline;
if (defined $inline) {
utf8::encode $inline;
$path = md5_sum $inline;
}
return unless defined $path;

# Cache
Expand Down
6 changes: 3 additions & 3 deletions t/mojo/bytestream.t
Expand Up @@ -60,7 +60,7 @@ $stream = b("Zm9vYmFyJCVeJjMyMTc=\n");
is $stream->b64_decode, 'foobar$%^&3217', 'right base64 decoded result';

# utf8 b64_encode
$stream = b("foo\x{df}\x{0100}bar%23\x{263a}")->b64_encode;
$stream = b("foo\x{df}\x{0100}bar%23\x{263a}")->encode->b64_encode;
is "$stream", "Zm9vw5/EgGJhciUyM+KYug==\n", 'right base64 encoded result';

# utf8 b64_decode
Expand All @@ -87,7 +87,7 @@ $stream = b('business%3B23');
is $stream->url_unescape, 'business;23', 'right url unescaped result';

# utf8 url_escape
$stream = b("foo\x{df}\x{0100}bar\x{263a}")->url_escape;
$stream = b("foo\x{df}\x{0100}bar\x{263a}")->encode->url_escape;
is "$stream", 'foo%C3%9F%C4%80bar%E2%98%BA', 'right url escaped result';

# utf8 url_unescape
Expand Down Expand Up @@ -115,7 +115,7 @@ $stream = b('"\"foo 23 \"bar\""');
is $stream->unquote, '"foo 23 "bar"', 'right unquoted result';

# md5_bytes
$original = 'foo bar baz ♥';
$original = b('foo bar baz ♥')->encode->to_string;
my $copy = $original;
$stream = b($copy);
is unpack('H*', $stream->md5_bytes), "a740aeb6e066f158cbf19fd92e890d2d",
Expand Down
17 changes: 10 additions & 7 deletions t/mojolicious/lite_app.t
Expand Up @@ -10,7 +10,7 @@ BEGIN {
$ENV{MOJO_MODE} = 'development';
}

use Test::More tests => 890;
use Test::More tests => 893;

# Pollution
123 =~ m/(\d+)/;
Expand Down Expand Up @@ -64,8 +64,8 @@ get '/☃' => sub {
$self->render_text($self->url_for . $self->url_for('current'));
};

# GET /unicode/a%E4b
get '/unicode/aäb' => sub {
# GET /uni/a%E4b
get '/uni/aäb' => sub {
my $self = shift;
$self->render(text => $self->url_for);
};
Expand Down Expand Up @@ -773,8 +773,11 @@ $t->get_ok('/☃')->status_is(200)->content_is('/%E2%98%83/%E2%98%83');
# GET /☃ (with trailing slash)
$t->get_ok('/☃/')->status_is(200)->content_is('/%E2%98%83//%E2%98%83/');

# GET /unicode/a%E4b
$t->get_ok('/unicode/a%E4b')->status_is(200)->content_is('/unicode/a%E4b');
# GET /uni/aäb
$t->get_ok('/uni/aäb')->status_is(200)->content_is('/uni/a%C3%A4b');

# GET /uni/a%E4b
$t->get_ok('/uni/a%E4b')->status_is(200)->content_is('/uni/a%C3%A4b');

# GET /unicode/☃
$t->get_ok('/unicode/☃')->status_is(200)
Expand Down Expand Up @@ -1157,7 +1160,7 @@ $t->get_ok('/inline/ep/too')->status_is(200)->content_is("0\n");

# GET /inline/ep/partial
$t->get_ok('/inline/ep/partial')->status_is(200)
->content_is("♥just ♥\nworks!\n");
->content_is(b("♥just ♥\nworks!\n")->encode);

# GET /source
$t->get_ok('/source')->status_is(200)->content_like(qr/get_ok\('\/source/);
Expand Down Expand Up @@ -1699,7 +1702,7 @@ $t->get_ok('/bridge2stash')->status_is(200)

# GET /bridge2stash (broken session cookie)
$t->reset_session;
my $session = b("☃☃☃☃☃")->b64_encode('');
my $session = b("☃☃☃☃☃")->encode->b64_encode('');
my $hmac = $session->clone->hmac_md5_sum($t->app->secret);
my $broken = "\$Version=1; mojolicious=$session--$hmac; \$Path=/";
$t->get_ok('/bridge2stash' => {Cookie => $broken})->status_is(200)
Expand Down
4 changes: 2 additions & 2 deletions t/mojolicious/upload_lite_app.t
Expand Up @@ -101,7 +101,7 @@ post '/uploadlimit' => sub {
$self->res->body("called, $body");
return if $self->req->is_limit_exceeded;
if (my $u = $self->req->upload('Вячеслав')) {
$self->res->body($self->res->body . $u->filename . $u->size);
$self->res->body($self->res->body . b($u->filename)->encode . $u->size);
}
};

Expand Down Expand Up @@ -175,7 +175,7 @@ $backup = $ENV{MOJO_MAX_MESSAGE_SIZE} || '';
$ENV{MOJO_MAX_MESSAGE_SIZE} = 1073741824;
$tx = Mojo::Transaction::HTTP->new;
$part = Mojo::Content::Single->new;
$name = b('Вячеслав')->url_escape;
$name = b('Вячеслав')->encode->url_escape;
$part->headers->content_disposition(
qq/form-data; name="$name"; filename="$name.jpg"/);
$part->headers->content_type('image/jpeg');
Expand Down

0 comments on commit fce07e9

Please sign in to comment.