Skip to content

Commit

Permalink
fixed RFC 7159 support in Mojo::JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed Mar 3, 2014
1 parent ac9a001 commit f9d4985
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 125 deletions.
3 changes: 3 additions & 0 deletions Changes
@@ -1,4 +1,7 @@

4.87 2014-03-04
- Fixed RFC 7159 support in Mojo::JSON.

4.86 2014-03-03
- Improved Mojo::IOLoop::Delay to allow more method chaining.
- Improved WebSocket and long poll performance.
Expand Down
69 changes: 18 additions & 51 deletions lib/Mojo/JSON.pm
Expand Up @@ -31,20 +31,12 @@ my %ESCAPE = (
my %REVERSE = map { $ESCAPE{$_} => "\\$_" } keys %ESCAPE;
for (0x00 .. 0x1f) { $REVERSE{pack 'C', $_} //= sprintf '\u%.4X', $_ }

# Unicode encoding detection
my $UTF_PATTERNS = {
'UTF-32BE' => qr/^\x00{3}[^\x00]/,
'UTF-32LE' => qr/^[^\x00]\x00{3}/,
'UTF-16BE' => qr/^(?:\x00[^\x00]){2}/,
'UTF-16LE' => qr/^(?:[^\x00]\x00){2}/
};

my $WHITESPACE_RE = qr/[\x20\x09\x0a\x0d]*/;

sub decode {
my $self = shift->error(undef);
my $ref = eval { _decode(shift) };
return $ref if $ref;
my $value;
return $value if eval { $value = _decode(shift); 1 };
$self->error(_chomp($@));
return undef;
}
Expand Down Expand Up @@ -73,37 +65,23 @@ sub _decode {
# Missing input
die "Missing or empty input\n" unless length(my $bytes = shift);

# Remove BOM
$bytes =~ s/^(?:\357\273\277|\377\376\0\0|\0\0\376\377|\376\377|\377\376)//g;

# Wide characters
die "Wide character in input\n" unless utf8::downgrade($bytes, 1);

# Detect and decode Unicode
my $encoding = 'UTF-8';
$bytes =~ $UTF_PATTERNS->{$_} and $encoding = $_ for keys %$UTF_PATTERNS;
local $_ = Mojo::Util::decode($encoding, $bytes) // '';
# UTF-8
die "Input is not UTF-8 encoded\n"
unless defined(local $_ = Mojo::Util::decode('UTF-8', $bytes));

# Leading whitespace
m/\G$WHITESPACE_RE/gc;

# Array
my $ref;
if (m/\G\[/gc) { $ref = _decode_array() }

# Object
elsif (m/\G\{/gc) { $ref = _decode_object() }

# Invalid character
else { _exception('Expected array or object') }
# Value
my $value = _decode_value();

# Leftover data
unless (m/\G$WHITESPACE_RE\z/gc) {
my $got = ref $ref eq 'ARRAY' ? 'array' : 'object';
_exception("Unexpected data after $got");
}
_exception('Unexpected data') unless m/\G$WHITESPACE_RE\z/gc;

return $ref;
return $value;
}

sub _decode_array {
Expand Down Expand Up @@ -333,17 +311,12 @@ Mojo::JSON - Minimalistic JSON
# Handle errors
my $json = Mojo::JSON->new;
if (defined(my $hash = $json->decode($bytes))) { say $hash->{message} }
else { say 'Error: ', $json->error }
# Ignore errors
use Mojo::JSON 'j';
my $bytes = j({foo => [1, 2], bar => 'hello!', baz => \1});
my $hash = j($bytes);
elsif (my $err = $json->error) { say "Error: $err" }
=head1 DESCRIPTION
L<Mojo::JSON> is a minimalistic and possibly the fastest pure-Perl
implementation of L<RFC 4627|http://tools.ietf.org/html/rfc4627>.
implementation of L<RFC 7159|http://tools.ietf.org/html/rfc7159>.
It supports normal Perl data types like C<Scalar>, C<Array> reference, C<Hash>
reference and will try to call the C<TO_JSON> method on blessed references, or
Expand All @@ -369,9 +342,8 @@ if their values are true or false.
\1 -> true
\0 -> false
Decoding UTF-16 (LE/BE) and UTF-32 (LE/BE) will be handled transparently,
encoding will only generate UTF-8. The two Unicode whitespace characters
C<u2028> and C<u2029> will always be escaped to make JSONP easier.
The two Unicode whitespace characters C<u2028> and C<u2029> will always be
escaped to make JSONP easier.
=head1 FUNCTIONS
Expand All @@ -380,14 +352,12 @@ individually.
=head2 decode_json
my $array = decode_json($bytes);
my $hash = decode_json($bytes);
my $value = decode_json($bytes);
Decode JSON to Perl data structure and die if decoding fails.
=head2 encode_json
my $bytes = encode_json([1, 2, 3]);
my $bytes = encode_json({foo => 'bar'});
Encode Perl data structure to JSON.
Expand All @@ -396,11 +366,10 @@ Encode Perl data structure to JSON.
my $bytes = j([1, 2, 3]);
my $bytes = j({foo => 'bar'});
my $array = j($bytes);
my $hash = j($bytes);
my $value = j($bytes);
Encode Perl data structure or decode JSON and return C<undef> if decoding
fails.
Encode Perl data structure, which may only be an C<Array> reference or C<Hash>
reference, or decode JSON and return C<undef> if decoding fails.
=head1 ATTRIBUTES
Expand All @@ -420,14 +389,12 @@ following new ones.
=head2 decode
my $array = $json->decode($bytes);
my $hash = $json->decode($bytes);
my $value = $json->decode($bytes);
Decode JSON to Perl data structure and return C<undef> if decoding fails.
=head2 encode
my $bytes = $json->encode([1, 2, 3]);
my $bytes = $json->encode({foo => 'bar'});
Encode Perl data structure to JSON.
Expand Down
3 changes: 1 addition & 2 deletions lib/Mojo/Message.pm
Expand Up @@ -567,8 +567,7 @@ Check if message has exceeded L</"max_line_size"> or L</"max_message_size">.
=head2 json
my $hash = $msg->json;
my $array = $msg->json;
my $value = $msg->json;
my $value = $msg->json('/foo/bar');
Decode JSON message body directly using L<Mojo::JSON> if possible, returns
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojolicious.pm
Expand Up @@ -43,7 +43,7 @@ has types => sub { Mojolicious::Types->new };
has validator => sub { Mojolicious::Validator->new };

our $CODENAME = 'Top Hat';
our $VERSION = '4.86';
our $VERSION = '4.87';

sub AUTOLOAD {
my $self = shift;
Expand Down
2 changes: 1 addition & 1 deletion lib/Mojolicious/Controller.pm
Expand Up @@ -755,7 +755,7 @@ Get L<Mojo::Message::Request> object from L<Mojo::Transaction/"req">.
my $bytes = $c->req->body;
my $str = $c->req->text;
my $hash = $c->req->params->to_hash;
my $hash = $c->req->json;
my $value = $c->req->json;
my $foo = $c->req->json('/23/foo');
my $dom = $c->req->dom;
my $bar = $c->req->dom('div.bar')->first->text;
Expand Down
4 changes: 2 additions & 2 deletions lib/ojo.pm
Expand Up @@ -131,9 +131,9 @@ L<Mojo::Message::Response> object.
=head2 j
my $bytes = j([1, 2, 3]);
my $bytes = j({foo => 'bar'});
my $array = j($bytes);
my $hash = j($bytes);
my $value = j($bytes);
Encode Perl data structure or decode JSON with L<Mojo::JSON/"j">.
Expand Down
103 changes: 35 additions & 68 deletions t/mojo/json.t
Expand Up @@ -43,6 +43,10 @@ $array = $json->decode('[37.7668 , [ 20 ]] ');
is_deeply $array, [37.7668, [20]], 'decode [37.7668 , [ 20 ]] ';
$array = $json->decode('[1e3]');
cmp_ok $array->[0], '==', 1e3, 'value is 1e3';
my $value = $json->decode('0');
cmp_ok $value, '==', 0, 'decode 0';
$value = $json->decode('23.3');
cmp_ok $value, '==', 23.3, 'decode 23.3';

# Decode name
$array = $json->decode('[true]');
Expand All @@ -52,6 +56,12 @@ is_deeply $array, [undef], 'decode [null]';
$array = $json->decode('[true, false]');
is_deeply $array, [Mojo::JSON->true, Mojo::JSON->false],
'decode [true, false]';
$value = $json->decode('true');
is $value, Mojo::JSON->true, 'decode true';
$value = $json->decode('false');
is $value, Mojo::JSON->false, 'decode false';
$value = $json->decode('null');
is $value, undef, 'decode null';

# Decode string
$array = $json->decode('[" "]');
Expand All @@ -73,6 +83,10 @@ $array = $json->decode('["1"]');
is_deeply $array, ['1'], 'decode ["1"]';
$array = $json->decode('["\u0007\b\/\f\r"]');
is_deeply $array, ["\a\b/\f\r"], 'decode ["\u0007\b\/\f\r"]';
$value = $json->decode('""');
is $value, '', 'decode ""';
$value = $json->decode('"hell\no"');
is $value, "hell\no", 'decode "hell\no"';

# Decode object
my $hash = $json->decode('{}');
Expand Down Expand Up @@ -135,6 +149,10 @@ is $bytes, '["123abc"]', 'encode ["123abc"]';
$bytes = $json->encode(["\x00\x1f \a\b/\f\r"]);
is $bytes, '["\\u0000\\u001F \\u0007\\b\/\f\r"]',
'encode ["\x00\x1f \a\b/\f\r"]';
$bytes = $json->encode('');
is $bytes, '""', 'encode ""';
$bytes = $json->encode("hell\no");
is $bytes, '"hell\no"', 'encode "hell\no"';

# Encode object
$bytes = $json->encode({});
Expand All @@ -155,6 +173,12 @@ $bytes = $json->encode([undef]);
is $bytes, '[null]', 'encode [undef]';
$bytes = $json->encode([Mojo::JSON->true, Mojo::JSON->false]);
is $bytes, '[true,false]', 'encode [Mojo::JSON->true, Mojo::JSON->false]';
$bytes = $json->encode(Mojo::JSON->true);
is $bytes, 'true', 'encode Mojo::JSON->true';
$bytes = $json->encode(Mojo::JSON->false);
is $bytes, 'false', 'encode Mojo::JSON->false';
$bytes = $json->encode(undef);
is $bytes, 'null', 'encode undef';

# Encode number
$bytes = $json->encode([1]);
Expand All @@ -173,59 +197,17 @@ $bytes = $json->encode([10e12, [2]]);
is $bytes, '[10000000000000,[2]]', 'encode [10e12, [2]]';
$bytes = $json->encode([37.7668, [20]]);
is $bytes, '[37.7668,[20]]', 'encode [37.7668, [20]]';
$bytes = $json->encode(0);
is $bytes, '0', 'encode 0';
$bytes = $json->encode(23.3);
is $bytes, '23.3', 'encode 23.3';

# Faihu roundtrip
$bytes = j(["\x{10346}"]);
is b($bytes)->decode('UTF-8'), "[\"\x{10346}\"]", 'encode ["\x{10346}"]';
$array = j($bytes);
is_deeply $array, ["\x{10346}"], 'successful roundtrip';

# Decode UTF-16LE
$array = $json->decode(b("\x{feff}[true]")->encode('UTF-16LE'));
is_deeply $array, [Mojo::JSON->true], 'decode \x{feff}[true]';

# Decode UTF-16LE with faihu surrogate pair
$array = $json->decode(b("\x{feff}[\"\\ud800\\udf46\"]")->encode('UTF-16LE'));
is_deeply $array, ["\x{10346}"], 'decode \x{feff}[\"\\ud800\\udf46\"]';

# Decode UTF-16LE with faihu surrogate pair and BOM value
$array = $json->decode(
b("\x{feff}[\"\\ud800\\udf46\x{feff}\"]")->encode('UTF-16LE'));
is_deeply $array, ["\x{10346}\x{feff}"],
'decode \x{feff}[\"\\ud800\\udf46\x{feff}\"]';

# Decode UTF-16BE with faihu surrogate pair
$array = $json->decode(b("\x{feff}[\"\\ud800\\udf46\"]")->encode('UTF-16BE'));
is_deeply $array, ["\x{10346}"], 'decode \x{feff}[\"\\ud800\\udf46\"]';

# Decode UTF-32LE
$array = $json->decode(b("\x{feff}[true]")->encode('UTF-32LE'));
is_deeply $array, [Mojo::JSON->true], 'decode \x{feff}[true]';

# Decode UTF-32BE
$array = $json->decode(b("\x{feff}[true]")->encode('UTF-32BE'));
is_deeply $array, [Mojo::JSON->true], 'decode \x{feff}[true]';

# Decode UTF-16LE without BOM
$array
= $json->decode(b("[\"\\ud800\\udf46\"]")->encode('UTF-16LE')->to_string);
is_deeply $array, ["\x{10346}"], 'decode [\"\\ud800\\udf46\"]';

# Decode UTF-16BE without BOM
$array
= $json->decode(b("[\"\\ud800\\udf46\"]")->encode('UTF-16BE')->to_string);
is_deeply $array, ["\x{10346}"], 'decode [\"\\ud800\\udf46\"]';

# Decode UTF-32LE without BOM
$array
= $json->decode(b("[\"\\ud800\\udf46\"]")->encode('UTF-32LE')->to_string);
is_deeply $array, ["\x{10346}"], 'decode [\"\\ud800\\udf46\"]';

# Decode UTF-32BE without BOM
$array
= $json->decode(b("[\"\\ud800\\udf46\"]")->encode('UTF-32BE')->to_string);
is_deeply $array, ["\x{10346}"], 'decode [\"\\ud800\\udf46\"]';

# Decode object with duplicate keys
$hash = $json->decode('{"foo": 1, "foo": 2}');
is_deeply $hash, {foo => 2}, 'decode {"foo": 1, "foo": 2}';
Expand Down Expand Up @@ -324,12 +306,10 @@ like $json->encode({test => -sin(9**9**9)}), qr/^{"test":".*"}$/,
# Errors
is $json->decode('["♥"]'), undef, 'wide character in input';
is $json->error, 'Wide character in input', 'right error';
is $json->decode(b("\x{feff}[\"\\ud800\"]")->encode('UTF-16LE')), undef,
'syntax error';
is $json->decode(b('["\\ud800"]')->encode), undef, 'syntax error';
is $json->error, 'Malformed JSON: Missing low-surrogate at line 1, offset 8',
'right error';
is $json->decode(b("\x{feff}[\"\\udf46\"]")->encode('UTF-16LE')), undef,
'syntax error';
is $json->decode(b('["\\udf46"]')->encode), undef, 'syntax error';
is $json->error, 'Malformed JSON: Missing high-surrogate at line 1, offset 8',
'right error';
is $json->decode('[[]'), undef, 'syntax error';
Expand Down Expand Up @@ -361,35 +341,22 @@ is $json->decode('["foo]'), undef, 'syntax error';
is $json->error, 'Malformed JSON: Unterminated string at line 1, offset 6',
'right error';
is $json->decode('{"foo":"bar"}lala'), undef, 'syntax error';
is $json->error,
'Malformed JSON: Unexpected data after object at line 1, offset 13',
'right error';
is $json->decode('false'), undef, 'syntax error';
is $json->error,
'Malformed JSON: Expected array or object at line 0, offset 0',
is $json->error, 'Malformed JSON: Unexpected data at line 1, offset 13',
'right error';
is $json->decode(''), undef, 'missing input';
is $json->error, 'Missing or empty input', 'right error';
is $json->decode("[\"foo\",\n\"bar\"]lala"), undef, 'syntax error';
is $json->error,
'Malformed JSON: Unexpected data after array at line 2, offset 6',
is $json->error, 'Malformed JSON: Unexpected data at line 2, offset 6',
'right error';
is $json->decode("[\"foo\",\n\"bar\",\n\"bazra\"]lalala"), undef,
'syntax error';
is $json->error,
'Malformed JSON: Unexpected data after array at line 3, offset 8',
'right error';
is $json->decode('0'), undef, 'syntax error';
is $json->error,
'Malformed JSON: Expected array or object at line 0, offset 0',
is $json->error, 'Malformed JSON: Unexpected data at line 3, offset 8',
'right error';
is $json->decode(encode('Shift_JIS', 'やった')), undef, 'invalid encoding';
is $json->error,
'Malformed JSON: Expected array or object at line 0, offset 0',
'right error';
is $json->error, 'Input is not UTF-8 encoded', 'right error';
is j('{'), undef, 'syntax error';
eval { decode_json("[\"foo\",\n\"bar\",\n\"bazra\"]lalala") };
like $@, qr/JSON: Unexpected data after array at line 3, offset 8 at.*json\.t/,
like $@, qr/JSON: Unexpected data at line 3, offset 8 at.*json\.t/,
'right error';

done_testing();

0 comments on commit f9d4985

Please sign in to comment.