Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
improved Mojo::Path performance significantly
  • Loading branch information
kraih committed Nov 24, 2012
1 parent 1e5af14 commit 50179ec
Show file tree
Hide file tree
Showing 9 changed files with 90 additions and 46 deletions.
4 changes: 3 additions & 1 deletion Changes
@@ -1,5 +1,7 @@

3.61 2012-11-23
3.61 2012-11-24
- Added normalize method to Mojo::Path.
- Improved Mojo::Path performance significantly.
- Improved documentation.
- Improved tests.

Expand Down
84 changes: 55 additions & 29 deletions lib/Mojo/Path.pm
Expand Up @@ -5,10 +5,9 @@ use overload
'""' => sub { shift->to_string },
fallback => 1;

use Mojo::Util qw(encode url_escape url_unescape);
use Mojo::Util qw(decode encode url_escape url_unescape);

has [qw(leading_slash trailing_slash)];
has parts => sub { [] };

sub new { shift->SUPER::new->parse(@_) }

Expand Down Expand Up @@ -38,11 +37,14 @@ sub canonicalize {
}

sub clone {
my $self = shift;
my $self = shift;

my $clone = Mojo::Path->new;
$clone->{string} = $self->{string};
$clone->leading_slash($self->leading_slash);
$clone->trailing_slash($self->trailing_slash);
return $clone->parts([@{$self->parts}]);

return $clone;
}

sub contains {
Expand All @@ -64,21 +66,35 @@ sub merge {
return $self->parse($path) if $path =~ m!^/!;

# Merge
pop @{$self->parts} unless $self->trailing_slash;
my $parts = $self->parts;
pop @$parts unless $self->trailing_slash;
$path = $self->new($path);
push @{$self->parts}, @{$path->parts};
$self->parts([@$parts, @{$path->parts}]);
return $self->trailing_slash($path->trailing_slash);
}

sub normalize {
my $self = shift;
$self->{string} = _parts_to_string(_string_to_parts($self->{string}));
return $self;
}

sub parse {
my ($self, $path) = @_;

$path = url_unescape $path // '';
utf8::decode $path;
$path =~ s!^/!! ? $self->leading_slash(1) : $self->leading_slash(undef);
$path =~ s!/$!! ? $self->trailing_slash(1) : $self->trailing_slash(undef);
$path //= '';
$self->leading_slash($path =~ s!^(?:%2F|/)!!i ? 1 : undef);
$self->trailing_slash($path =~ s!(?:%2F|/)$!!i ? 1 : undef);
$self->{string} = $path;

return $self;
}

return $self->parts([split '/', $path, -1]);
sub parts {
my ($self, $parts) = @_;
return [_string_to_parts($self->{string})] unless $parts;
$self->{string} = _parts_to_string(@$parts);
return $self;
}

sub to_abs_string {
Expand All @@ -89,18 +105,25 @@ sub to_abs_string {
sub to_string {
my $self = shift;

# Escape
my $chars = '^A-Za-z0-9\-._~!$&\'()*+,;=:@';
my @parts = map { url_escape(encode('UTF-8', $_), $chars) } @{$self->parts};

# Format
my $path = join '/', @parts;
my $path = url_escape encode('UTF-8', $self->{string}),
'^A-Za-z0-9\-._~!$&\'()*+,;=%:@/';
$path = "/$path" if $self->leading_slash;
$path = "$path/" if $self->trailing_slash;

return $path;
}

sub _parts_to_string {
my $chars = '^A-Za-z0-9\-._~!$&\'()*+,;=:@';
return join '/', map { url_escape(encode('UTF-8', $_), $chars) } @_;
}

sub _string_to_parts {
my $path = url_unescape shift;
$path = decode('UTF-8', $path) // $path;
return split '/', $path, -1;
}

1;

=head1 NAME
Expand All @@ -111,8 +134,8 @@ Mojo::Path - Path
use Mojo::Path;
my $path = Mojo::Path->new('/foo%2Fbar%3B/baz.html');
shift @{$path->parts};
my $path = Mojo::Path->new('/foo%2Fbar%3B/../baz.html');
$path->canonicalize;
say "$path";
=head1 DESCRIPTION
Expand All @@ -130,16 +153,6 @@ L<Mojo::Path> implements the following attributes.
Path has a leading slash.
=head2 C<parts>
my $parts = $path->parts;
$path = $path->parts([qw(foo bar baz)]);
The path parts.
# Part with slash
push @{$path->parts}, 'foo/bar';
=head2 C<trailing_slash>
my $trailing_slash = $path->trailing_slash;
Expand Down Expand Up @@ -207,12 +220,25 @@ Merge paths.
# "/foo/bar/baz/yada"
Mojo::Path->new('/foo/bar/')->merge('baz/yada');
=head2 C<normalize>
$path = $path->normalize;
Normalize path.
=head2 C<parse>
$path = $path->parse('/foo%2Fbar%3B/baz.html');
Parse path. Note that C<%2F> will be treated as C</> for security reasons.
=head2 C<parts>
my $parts = $path->parts;
$path = $path->parts([qw(foo bar baz)]);
The path parts.
=head2 C<to_abs_string>
my $string = $path->to_abs_string;
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojo/UserAgent/CookieJar.pm
Expand Up @@ -52,7 +52,7 @@ sub find {

# Look through the jar
return unless my $domain = $url->host;
my $path = $url->path->to_string || '/';
my $path = $url->path->clone->normalize->to_abs_string;
my @found;
while ($domain =~ /[^.]+\.[^.]+|localhost$/) {
next unless my $old = $self->{jar}{$domain};
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojolicious/Controller.pm
Expand Up @@ -432,7 +432,7 @@ sub url_for {

# Make path absolute
my $base_path = $base->path;
unshift @{$path->parts}, @{$base_path->parts};
$path->parts([@{$base_path->parts}, @{$path->parts}]);
$base_path->parts([])->trailing_slash(0);

return $url;
Expand Down
9 changes: 7 additions & 2 deletions lib/Mojolicious/Guides/Cookbook.pod
Expand Up @@ -241,8 +241,13 @@ incoming requests is also quite common.
# Move first part from path to base path in production mode
app->hook(before_dispatch => sub {
my $self = shift;
push @{$self->req->url->base->path->parts},
shift @{$self->req->url->path->parts};

my $path = $self->req->url->path->parts;
my $base = $self->req->url->base->path->parts;
push @$base, shift @$path;
$self->req->url->path->parts($path);
$self->req->url->base->path->parts($base);

}) if app->mode eq 'production';

=head2 Application embedding
Expand Down
3 changes: 2 additions & 1 deletion lib/Mojolicious/Static.pm
Expand Up @@ -23,7 +23,8 @@ sub dispatch {

# Canonical path
my $stash = $c->stash;
my $path = $stash->{path} || $c->req->url->path->clone->canonicalize;
my $path = $stash->{path}
|| $c->req->url->path->clone->normalize->canonicalize;

# Split parts
return undef unless my @parts = @{Mojo::Path->new("$path")->parts};
Expand Down
15 changes: 9 additions & 6 deletions t/mojo/path.t
Expand Up @@ -62,7 +62,6 @@ is $path->to_abs_string, '/0', 'right result';
# Canonicalizing
$path = Mojo::Path->new(
'/%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2fetc%2fpasswd');
is "$path", '//../../../../../../../../../../etc/passwd', 'right result';
is $path->parts->[0], '', 'right part';
is $path->parts->[1], '..', 'right part';
is $path->parts->[2], '..', 'right part';
Expand All @@ -77,6 +76,8 @@ is $path->parts->[10], '..', 'right part';
is $path->parts->[11], 'etc', 'right part';
is $path->parts->[12], 'passwd', 'right part';
is $path->parts->[13], undef, 'no part';
is $path->normalize, '//../../../../../../../../../../etc/passwd',
'right result';
is $path->canonicalize, '/../../../../../../../../../../etc/passwd',
'right result';
is $path->parts->[0], '..', 'right part';
Expand All @@ -98,7 +99,6 @@ ok !$path->trailing_slash, 'no trailing slash';
# Canonicalizing (alternative)
$path = Mojo::Path->new(
'%2ftest%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2f..%2fetc%2fpasswd');
is "$path", '/test/../../../../../../../../../etc/passwd', 'right result';
is $path->parts->[0], 'test', 'right part';
is $path->parts->[1], '..', 'right part';
is $path->parts->[2], '..', 'right part';
Expand All @@ -112,6 +112,8 @@ is $path->parts->[9], '..', 'right part';
is $path->parts->[10], 'etc', 'right part';
is $path->parts->[11], 'passwd', 'right part';
is $path->parts->[12], undef, 'no part';
is $path->normalize, '/test/../../../../../../../../../etc/passwd',
'right result';
is $path->canonicalize, '/../../../../../../../../etc/passwd', 'right result';
is $path->parts->[0], '..', 'right part';
is $path->parts->[1], '..', 'right part';
Expand All @@ -129,7 +131,6 @@ ok !$path->trailing_slash, 'no trailing slash';

# Canonicalizing (with escaped "%")
$path = Mojo::Path->new('%2ftest%2f..%252f..%2f..%2f..%2f..%2fetc%2fpasswd');
is "$path", '/test/..%252f../../../../etc/passwd', 'right result';
is $path->parts->[0], 'test', 'right part';
is $path->parts->[1], '..%2f..', 'right part';
is $path->parts->[2], '..', 'right part';
Expand All @@ -138,6 +139,7 @@ is $path->parts->[4], '..', 'right part';
is $path->parts->[5], 'etc', 'right part';
is $path->parts->[6], 'passwd', 'right part';
is $path->parts->[7], undef, 'no part';
is $path->normalize, '/test/..%252f../../../../etc/passwd', 'right result';
is $path->canonicalize, '/../etc/passwd', 'right result';
is $path->parts->[0], '..', 'right part';
is $path->parts->[1], 'etc', 'right part';
Expand Down Expand Up @@ -267,9 +269,10 @@ ok $path->leading_slash, 'has leading slash';
ok $path->trailing_slash, 'has trailing slash';

# Escaped slash
$path = Mojo::Path->new->parts(['foo/bar']);
is $path->parts->[0], 'foo/bar', 'right part';
is $path->parts->[1], undef, 'no part';
$path = Mojo::Path->new('foo%2Fbar');
is $path->parts->[0], 'foo', 'right part';
is $path->parts->[1], 'bar', 'right part';
is $path->parts->[2], undef, 'no part';
is "$path", 'foo%2Fbar', 'right result';
is $path->to_string, 'foo%2Fbar', 'right result';
is $path->to_abs_string, '/foo%2Fbar', 'right result';
Expand Down
2 changes: 1 addition & 1 deletion t/mojo/request.t
Expand Up @@ -1056,7 +1056,7 @@ $clone = $req->clone;
$clone->method('POST');
$clone->headers->expect('nothing');
$clone->version('1.2');
push @{$clone->url->path->parts}, 'baz';
$clone->url->path->parts([@{$clone->url->path->parts}, 'baz']);
$req = Mojo::Message::Request->new->parse($req->to_string);
ok $req->is_finished, 'request is finished';
is $req->method, 'GET', 'right method';
Expand Down
15 changes: 11 additions & 4 deletions t/mojo/url.t
Expand Up @@ -250,11 +250,16 @@ is $url->scheme, 'http', 'right scheme';
is $url->userinfo, undef, 'no userinfo';
is $url->host, 'acme.s3.amazonaws.com', 'right host';
is $url->port, undef, 'no port';
is $url->path, '/mojo/g++-4.2_4.2.3-2ubuntu7_i386.deb', 'right path';
is $url->path, '/mojo%2Fg%2B%2B-4%2E2_4%2E2%2E3-2ubuntu7_i386%2Edeb',
'right path';
ok !$url->query->to_string, 'no query';
is_deeply $url->query->to_hash, {}, 'right structure';
is $url->fragment, undef, 'no fragment';
is "$url", 'http://acme.s3.amazonaws.com/mojo/g++-4.2_4.2.3-2ubuntu7_i386.deb',
is "$url",
'http://acme.s3.amazonaws.com/mojo%2Fg%2B%2B-4%2E2_4%2E2%2E3-2ubuntu7_i386%2Edeb',
'right format';
is $url->tap(sub { $_->path->normalize }),
'http://acme.s3.amazonaws.com/mojo/g++-4.2_4.2.3-2ubuntu7_i386.deb',
'right format';

# Clone (advanced)
Expand Down Expand Up @@ -615,11 +620,13 @@ is $url->host, '1.1.1.1.1.1', 'right host';
# "%" in path
$url = Mojo::URL->new('http://mojolicio.us/100%_fun');
is $url->path->parts->[0], '100%_fun', 'right part';
is $url->path, '/100%25_fun', 'right path';
is $url->path, '/100%_fun', 'right path';
is $url->path->normalize, '/100%25_fun', 'right normalized path';
is "$url", 'http://mojolicio.us/100%25_fun', 'right format';
$url = Mojo::URL->new('http://mojolicio.us/100%fun');
is $url->path->parts->[0], '100%fun', 'right part';
is $url->path, '/100%25fun', 'right path';
is $url->path, '/100%fun', 'right path';
is $url->path->normalize, '/100%25fun', 'right normalized path';
is "$url", 'http://mojolicio.us/100%25fun', 'right format';
$url = Mojo::URL->new('http://mojolicio.us/100%25_fun');
is $url->path->parts->[0], '100%_fun', 'right part';
Expand Down

0 comments on commit 50179ec

Please sign in to comment.