Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
added parse_header function to Mojo::Util and fixed a few boundary an…
…d charset detection bugs
  • Loading branch information
kraih committed Jun 1, 2013
1 parent eaca9da commit 180cf67
Show file tree
Hide file tree
Showing 9 changed files with 85 additions and 49 deletions.
4 changes: 4 additions & 0 deletions Changes
@@ -1,4 +1,8 @@

4.11 2013-06-02
- Added parse_header function to Mojo::Util.
- Fixed a few small boundary and charset detection bugs in Mojo::Content.

4.10 2013-06-01
- Added link and vary methods to Mojo::Headers.

Expand Down
4 changes: 2 additions & 2 deletions lib/Mojo/Content.pm
Expand Up @@ -18,7 +18,7 @@ sub body_size { croak 'Method "body_size" not implemented by subclass' }

sub boundary {
return undef unless my $type = shift->headers->content_type;
$type =~ m!multipart.*boundary=(?:"([^"]+)"|([\w'(),.:?\-+/]+))!i
$type =~ m!multipart.*boundary\s*=\s*(?:"([^"]+)"|([\w'(),.:?\-+/]+))!i
and return $1 // $2;
return undef;
}
Expand All @@ -28,7 +28,7 @@ sub build_headers { shift->_build('get_header_chunk') }

sub charset {
my $type = shift->headers->content_type // '';
return $type =~ /charset="?([^"\s;]+)"?/i ? $1 : undef;
return $type =~ /charset\s*=\s*"?([^"\s;]+)"?/i ? $1 : undef;
}

sub clone {
Expand Down
28 changes: 0 additions & 28 deletions lib/Mojo/Cookie.pm
Expand Up @@ -6,40 +6,12 @@ use overload
fallback => 1;

use Carp 'croak';
use Mojo::Util 'unquote';

has [qw(name value)];

sub parse { croak 'Method "parse" not implemented by subclass' }
sub to_string { croak 'Method "to_string" not implemented by subclass' }

sub _tokenize {
my ($self, $str) = @_;

# Nibbling parser
my (@tree, @token);
while ($str =~ s/^\s*([^=;,]+)\s*=?\s*//) {
my $name = $1;

# "expires" is a special case, thank you Netscape...
$str =~ s/^([^;,]+,?[^;,]+)/"$1"/ if $name =~ /^expires$/i;

# Value
my $value;
$value = unquote $1 if $str =~ s/^("(?:\\\\|\\"|[^"])+"|[^;,]+)\s*//;
push @token, [$name, $value];

# Separator
$str =~ s/^\s*;\s*//;
next unless $str =~ s/^\s*,\s*//;
push @tree, [@token];
@token = ();
}

# Take care of final token
return @token ? (@tree, \@token) : @tree;
}

1;

=head1 NAME
Expand Down
4 changes: 2 additions & 2 deletions lib/Mojo/Cookie/Request.pm
@@ -1,13 +1,13 @@
package Mojo::Cookie::Request;
use Mojo::Base 'Mojo::Cookie';

use Mojo::Util 'quote';
use Mojo::Util qw(parse_header quote);

sub parse {
my ($self, $str) = @_;

my @cookies;
for my $token (map {@$_} $self->_tokenize($str // '')) {
for my $token (map {@$_} @{parse_header($str // '')}) {
my ($name, $value) = @$token;
next if $name =~ /^\$/;
push @cookies, $self->new(name => $name, value => $value // '');
Expand Down
20 changes: 15 additions & 5 deletions lib/Mojo/Cookie/Response.pm
Expand Up @@ -2,7 +2,7 @@ package Mojo::Cookie::Response;
use Mojo::Base 'Mojo::Cookie';

use Mojo::Date;
use Mojo::Util 'quote';
use Mojo::Util qw(parse_header quote);

has [qw(domain httponly max_age path secure)];

Expand All @@ -22,13 +22,23 @@ sub parse {
my ($self, $str) = @_;

my @cookies;
for my $token ($self->_tokenize($str // '')) {
for my $i (0 .. $#$token) {
my ($name, $value) = @{$token->[$i]};
my $tree = parse_header($str // '');
while (my $token = shift @$tree) {
my $i = 0;
while (my $pair = shift @$token) {
my ($name, $value) = @$pair;

# "expires" is a special case, thank you Netscape...
if ($name =~ /^expires$/i) {
my $next = shift @$tree;
my $rest = shift @$next;
push @$token, @$next;
$value .= ", $rest->[0]";
}

# This will only run once
push @cookies, $self->new(name => $name, value => $value // '') and next
unless $i;
unless $i++;

# Attributes (Netscape and RFC 6265)
my @match
Expand Down
29 changes: 28 additions & 1 deletion lib/Mojo/Util.pm
Expand Up @@ -39,7 +39,7 @@ my %CACHE;
our @EXPORT_OK = (
qw(b64_decode b64_encode camelize class_to_file class_to_path decamelize),
qw(decode deprecated encode get_line hmac_sha1_sum html_unescape md5_bytes),
qw(md5_sum monkey_patch punycode_decode punycode_encode quote),
qw(md5_sum monkey_patch parse_header punycode_decode punycode_encode quote),
qw(secure_compare sha1_bytes sha1_sum slurp spurt squish steady_time trim),
qw(unquote url_escape url_unescape xml_escape xor_encode)
);
Expand Down Expand Up @@ -129,6 +129,27 @@ sub monkey_patch {
*{"${class}::$_"} = $patch{$_} for keys %patch;
}
sub parse_header {
my $str = shift;
# Nibbling parser
my (@tree, @token);
while ($str =~ s/^\s*([^=;,]*[^=;, ])\s*=?\s*//) {
push @token, [$1];
$token[-1][1] = unquote($1)
if $str =~ s/^("(?:\\\\|\\"|[^"])+"|[^;,]+)\s*//;
# Separator
$str =~ s/^\s*;\s*//;
next unless $str =~ s/^\s*,\s*//;
push @tree, [@token];
@token = ();
}
# Take care of final token
return [@token ? (@tree, \@token) : @tree];
}
# Direct translation of RFC 3492
sub punycode_decode {
my $input = shift;
Expand Down Expand Up @@ -497,6 +518,12 @@ Monkey patch functions into package.
two => sub { say 'Two!' },
three => sub { say 'Three!' };
=head2 parse_header
my $parsed = parse_header 'foo="bar baz"; test=123, yada';
Parse HTTP header value.
=head2 punycode_decode
my $str = punycode_decode $punycode;
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojolicious.pm
Expand Up @@ -41,7 +41,7 @@ has static => sub { Mojolicious::Static->new };
has types => sub { Mojolicious::Types->new };

our $CODENAME = 'Top Hat';
our $VERSION = '4.10';
our $VERSION = '4.11';

sub AUTOLOAD {
my $self = shift;
Expand Down
19 changes: 12 additions & 7 deletions t/mojo/content.t
Expand Up @@ -58,7 +58,7 @@ is $content->build_body,
$content = Mojo::Content::MultiPart->new;
is $content->boundary, undef, 'no boundary';
$content->headers->content_type(
'multipart/form-data; boundary="azAZ09\'(),.:?-_+/"');
'multipart/form-data; boundary = "azAZ09\'(),.:?-_+/"');
is $content->boundary, "azAZ09\'(),.:?-_+/", 'right boundary';
is $content->boundary, $content->build_boundary, 'same boundary';
$content->headers->content_type('multipart/form-data');
Expand All @@ -70,12 +70,17 @@ $content->headers->content_type('MultiPart/Form-Data; BounDaRy="foo 123"');
is $content->boundary, 'foo 123', 'right boundary';
is $content->boundary, $content->build_boundary, 'same boundary';

# Tainted environment
$content = Mojo::Content::MultiPart->new;
'a' =~ /(.)/;
ok !$content->charset, 'no charset';
'a' =~ /(.)/;
ok !$content->boundary, 'no boundary';
# Charset detection
$content = Mojo::Content::Single->new;
is $content->charset, undef, 'no charset';
$content->headers->content_type('text/plain; charset=UTF-8');
is $content->charset, 'UTF-8', 'right charset';
$content->headers->content_type('text/plain; charset="UTF-8"');
is $content->charset, 'UTF-8', 'right charset';
$content->headers->content_type('text/plain; charset = UTF-8');
is $content->charset, 'UTF-8', 'right charset';
$content->headers->content_type('text/plain; charset = "UTF-8"');
is $content->charset, 'UTF-8', 'right charset';

# Partial content with 128bit content length
$content = Mojo::Content::Single->new;
Expand Down
24 changes: 21 additions & 3 deletions t/mojo/util.t
Expand Up @@ -11,9 +11,9 @@ use Mojo::DeprecationTest;
use Mojo::Util
qw(b64_decode b64_encode camelize class_to_file class_to_path decamelize),
qw(decode encode get_line hmac_sha1_sum html_unescape md5_bytes md5_sum),
qw(monkey_patch punycode_decode punycode_encode quote secure_compare),
qw(sha1_bytes sha1_sum slurp spurt squish steady_time trim unquote),
qw(url_escape url_unescape xml_escape xor_encode);
qw(monkey_patch parse_header punycode_decode punycode_encode quote),
qw(secure_compare sha1_bytes sha1_sum slurp spurt squish steady_time trim),
qw(unquote url_escape url_unescape xml_escape xor_encode);

# camelize
is camelize('foo_bar_baz'), 'FooBarBaz', 'right camelized result';
Expand Down Expand Up @@ -58,6 +58,24 @@ is get_line(\$buffer), 'yada', 'right line';
is $buffer, '', 'no buffer content';
is get_line(\$buffer), undef, 'no line';

# parse_header
is_deeply parse_header(''), [], 'right result';
is_deeply parse_header('foo,bar,baz'), [[['foo']], [['bar']], [['baz']]],
'right result';
is_deeply parse_header('foo="b a\" r\"\\\\"'), [[['foo', 'b a" r"\\']]],
'right result';
is_deeply parse_header('foo = "b a\" r\"\\\\"'), [[['foo', 'b a" r"\\']]],
'right result';
my $header = q{</foo/bar>; rel="x"; t*=UTF-8'de'a%20b};
my $parsed = [[['</foo/bar>'], ['rel', 'x'], ['t*', 'UTF-8\'de\'a%20b']]];
is_deeply parse_header($header), $parsed, 'right result';
$header = 'a=b c; A=b.c; D=/E; a-b=3; F=Thu, 07 Aug 2008 07:07:59 GMT; Ab;';
$parsed = [
[['a', 'b c'], ['A', 'b.c'], ['D', '/E'], ['a-b', '3'], ['F', 'Thu']],
[['07 Aug 2008 07:07:59 GMT'], ['Ab']]
];
is_deeply parse_header($header), $parsed, 'right result';

# b64_encode
is b64_encode('foobar$%^&3217'), "Zm9vYmFyJCVeJjMyMTc=\n",
'right base64 encoded result';
Expand Down

0 comments on commit 180cf67

Please sign in to comment.