Skip to content

Commit

Permalink
JSON: some fixes based on http://seriot.ch/parsing_json.html
Browse files Browse the repository at this point in the history
- Correctly error on control characters inside strings
- Prevent stack overflow for too nested structures
- Minor parsing fixes
Ary Borenszweig committed Oct 26, 2016

Verified

This commit was signed with the committer’s verified signature.
1 parent 61a3b69 commit 7eb738f
Showing 6 changed files with 103 additions and 30 deletions.
13 changes: 13 additions & 0 deletions spec/std/json/parser_spec.cr
Original file line number Diff line number Diff line change
@@ -52,6 +52,19 @@ describe JSON::Parser do
it_raises_on_parse "[0]1"
it_raises_on_parse "[0] 1 "
it_raises_on_parse "[\"\\u123z\"]"
it_raises_on_parse "[1 true]"
it_raises_on_parse %({"foo": 1 "bar": 2})
it_raises_on_parse %([2.])
it_raises_on_parse %("hello\nworld")
it_raises_on_parse %("\\u201cello\nworld")
it_raises_on_parse %("hello\tworld")
it_raises_on_parse %("\\u201cello\tworld")

it_raises_on_parse "1\u{0}"

# Prevent too deep nesting (prevents stack overflow)
it_raises_on_parse(("[" * 513) + ("]" * 513))
it_raises_on_parse(("{" * 513) + ("}" * 513))

it "returns raw" do
value = JSON.parse_raw("1")
4 changes: 4 additions & 0 deletions spec/std/json/pull_parser_spec.cr
Original file line number Diff line number Diff line change
@@ -162,6 +162,10 @@ describe JSON::PullParser do
assert_pull_parse_error %({"name": "John", "age", 1})
assert_pull_parse_error %({"name": "John", "age": "foo", "bar"})

# Prevent too deep nesting (prevents stack overflow)
assert_pull_parse_error(("[" * 513) + ("]" * 513))
assert_pull_parse_error(("{" * 513) + ("}" * 513))

describe "skip" do
[
{"null", "null"},
15 changes: 14 additions & 1 deletion src/json/lexer.cr
Original file line number Diff line number Diff line change
@@ -134,6 +134,10 @@ abstract class JSON::Lexer
when '"'
next_char
break
else
if 0 <= current_char.ord < 32
unexpected_char
end
end
end
end
@@ -155,7 +159,11 @@ abstract class JSON::Lexer
next_char
break
else
@buffer << char
if 0 <= current_char.ord < 32
unexpected_char
else
@buffer << char
end
end
end
if @expects_object_key
@@ -265,6 +273,11 @@ abstract class JSON::Lexer
append_number_char
divisor = 1_u64
char = next_char

unless '0' <= char <= '9'
unexpected_char
end

while '0' <= char <= '9'
append_number_char
integer *= 10
10 changes: 9 additions & 1 deletion src/json/lexer/string_based.cr
Original file line number Diff line number Diff line change
@@ -23,6 +23,10 @@ class JSON::Lexer::StringBased < JSON::Lexer
when '"'
next_char
break
else
if 0 <= current_char.ord < 32
unexpected_char
end
end
end

@@ -55,7 +59,11 @@ class JSON::Lexer::StringBased < JSON::Lexer
end

private def next_char_no_column_increment
@reader.next_char
char = @reader.next_char
if char == '\0' && @reader.pos != @reader.string.bytesize
unexpected_char
end
char
end

private def current_char
77 changes: 51 additions & 26 deletions src/json/parser.cr
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
require "./lexer"

class JSON::Parser
property max_nesting = 512

def initialize(string_or_io : String | IO)
@lexer = JSON::Lexer.new(string_or_io)
@nest = 0
next_token
end

@@ -40,16 +43,20 @@ class JSON::Parser

ary = [] of Type

if token.type != :"]"
while true
ary << parse_value

case token.type
when :","
next_token
unexpected_token if token.type == :"]"
when :"]"
break
nest do
if token.type != :"]"
while true
ary << parse_value

case token.type
when :","
next_token
unexpected_token if token.type == :"]"
when :"]"
break
else
unexpected_token
end
end
end
end
@@ -64,26 +71,30 @@ class JSON::Parser

object = {} of String => Type

if token.type != :"}"
while true
check :STRING
key = token.string_value
nest do
if token.type != :"}"
while true
check :STRING
key = token.string_value

next_token
next_token

check :":"
next_token
check :":"
next_token

value = parse_value
value = parse_value

object[key] = value
object[key] = value

case token.type
when :","
next_token_expect_object_key
unexpected_token if token.type == :"}"
when :"}"
break
case token.type
when :","
next_token_expect_object_key
unexpected_token if token.type == :"}"
when :"}"
break
else
unexpected_token
end
end
end
end
@@ -107,6 +118,20 @@ class JSON::Parser
end

private def unexpected_token
raise ParseException.new("unexpected token '#{token}'", token.line_number, token.column_number)
parse_exception "unexpected token '#{token}'"
end

private def parse_exception(msg)
raise ParseException.new(msg, token.line_number, token.column_number)
end

private def nest
@nest += 1
if @nest > @max_nesting
parse_exception "nesting of 101 is too deep"

This comment has been minimized.

Copy link
@Sija

Sija Oct 27, 2016

Contributor

should it be "nesting of #{@nest} is too deep"?

This comment has been minimized.

Copy link
@asterite

asterite Oct 27, 2016

Member

Oops, totally! It was hardcoded first. Thank you so much! You are fixing so many mistakes of mine :-)

end

yield
@nest -= 1
end
end
14 changes: 12 additions & 2 deletions src/json/pull_parser.cr
Original file line number Diff line number Diff line change
@@ -7,6 +7,8 @@ class JSON::PullParser
getter string_value : String
getter raw_value : String

property max_nesting = 512

def initialize(input)
@lexer = Lexer.new input
@kind = :EOF
@@ -431,7 +433,7 @@ class JSON::PullParser

private def begin_array
@kind = :begin_array
@object_stack << :array
push_in_object_stack :array

case next_token.type
when :",", :"}", :":", :EOF
@@ -441,7 +443,7 @@ class JSON::PullParser

private def begin_object
@kind = :begin_object
@object_stack << :object
push_in_object_stack :object

case next_token_expect_object_key.type
when :STRING, :"}"
@@ -499,4 +501,12 @@ class JSON::PullParser
private def parse_exception(msg)
raise ParseException.new(msg, token.line_number, token.column_number)
end

private def push_in_object_stack(symbol)
if @object_stack.size >= @max_nesting
parse_exception "nesting of 101 is too deep"
end

@object_stack.push(symbol)
end
end

3 comments on commit 7eb738f

@will
Copy link
Contributor

@will will commented on 7eb738f Oct 26, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw that article and thought, "oh here is something I can do real quick", but of course you fixed this 5 hours ago 😂

@asterite
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@will Oops, sorry! I was really curious to see how Crystal performed and it was failing a few cases so I decided to fix them. One of them was stack overflowing by sending a large "[[[[...", for which a fast fix was worth it :-)

@refi64
Copy link
Contributor

@refi64 refi64 commented on 7eb738f Oct 27, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haha, I read this exact same article on Reddit around 15 minutes before you committed this.

Please sign in to comment.