Skip to content

Commit

Permalink
Correct implementation of heredoc (#5578)
Browse files Browse the repository at this point in the history
Now you can specify multiple heredocs in a single line, just like in Ruby.
  • Loading branch information
asterite authored and RX14 committed Jan 17, 2018
1 parent 295ddc3 commit 8eb8554
Show file tree
Hide file tree
Showing 9 changed files with 224 additions and 96 deletions.
6 changes: 3 additions & 3 deletions spec/compiler/formatter/formatter_spec.cr
Expand Up @@ -881,6 +881,9 @@ describe Crystal::Formatter do
assert_format "<<-HTML\n hello \n world \n HTML"
assert_format " <<-HTML\n hello \n world \n HTML", "<<-HTML\n hello \n world \n HTML"

assert_format "x, y = <<-FOO, <<-BAR\n hello\n FOO\n world\n BAR"
assert_format "x, y, z = <<-FOO, <<-BAR, <<-BAZ\n hello\n FOO\n world\n BAR\n qux\nBAZ"

assert_format "#!shebang\n1 + 2"

assert_format " {{\n1 + 2 }}", "{{\n 1 + 2\n}}"
Expand Down Expand Up @@ -1029,9 +1032,6 @@ describe Crystal::Formatter do

assert_format "lib Foo\n {% if 1 %}\n 2\n {% end %}\nend\n\nmacro bar\n 1\nend"

assert_format %(puts(<<-FOO\n1\nFOO, 2))
assert_format %(puts <<-FOO\n1\nFOO, 2)

assert_format "x : Int32 |\nString", "x : Int32 |\n String"

assert_format %(foo("bar" \\\n"baz")), %(foo("bar" \\\n "baz"))
Expand Down
42 changes: 8 additions & 34 deletions spec/compiler/lexer/lexer_string_spec.cr
Expand Up @@ -161,6 +161,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("Hello, mom! I am HERE.")
tester.next_string_token_should_be("\nHER dress is beautiful.")
tester.next_string_token_should_be("\nHE is OK.")
Expand All @@ -173,6 +174,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.next_string_token_should_be("\n")
tester.string_should_end_correctly
Expand All @@ -183,6 +185,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.next_string_token_should_be("\r\n")
tester.string_should_end_correctly
Expand All @@ -193,6 +196,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.string_should_end_correctly
end
Expand All @@ -203,6 +207,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("Hello, mom! I am HERE.")
tester.token_should_be_at(line: 2)
tester.next_string_token_should_be("\nHER dress is beautiful.")
Expand All @@ -221,6 +226,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("abc")
tester.string_should_have_an_interpolation_of("foo")
tester.string_should_end_correctly
Expand All @@ -239,46 +245,14 @@ describe "Lexer string" do
end
end

it "raises on invalid heredoc identifier (<<-HERE A)" do
lexer = Lexer.new("<<-HERE A\ntest\nHERE\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises on invalid heredoc identifier (<<-HERE\\n)" do
lexer = Lexer.new("<<-HERE\\ntest\nHERE\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises when identifier doesn't start with a leter" do
lexer = Lexer.new("<<-123\\ntest\n123\n")
it "raises when identifier doesn't start with a leter or number" do
lexer = Lexer.new("<<-!!!\\ntest\n!!!\n")

expect_raises Crystal::SyntaxException, /heredoc identifier starts with invalid character/ do
lexer.next_token
end
end

it "raises when identifier contains a character not for identifier" do
lexer = Lexer.new("<<-aaa.bbb?\\ntest\naaa.bbb?\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises when identifier contains spaces" do
lexer = Lexer.new("<<-aaa bbb\\ntest\naaabbb\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises on unexpected EOF while lexing heredoc" do
lexer = Lexer.new("<<-aaa")

Expand Down
4 changes: 4 additions & 0 deletions spec/compiler/normalize/string_interpolation_spec.cr
Expand Up @@ -10,4 +10,8 @@ describe "Normalize: string interpolation" do
assert_expand "\"foo\#{bar}#{s}\"",
"((((::String::Builder.new(218)) << \"foo\") << bar) << \"#{s}\").to_s"
end

it "normalizes heredoc" do
assert_normalize "<<-FOO\nhello\nFOO", %("hello")
end
end
39 changes: 24 additions & 15 deletions spec/compiler/parser/parser_spec.cr
Expand Up @@ -1180,16 +1180,17 @@ describe "Parser" do
it_parses %("hello " \\\n "world"), StringLiteral.new("hello world")
it_parses %("hello "\\\n"world"), StringLiteral.new("hello world")
it_parses %("hello \#{1}" \\\n "\#{2} world"), StringInterpolation.new(["hello ".string, 1.int32, 2.int32, " world".string] of ASTNode)
it_parses "<<-HERE\nHello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY\nHERE", "Hello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY".string
it_parses "<<-HERE\n One\n Zero\n HERE", " One\nZero".string
it_parses "<<-HERE\n One \\n Two\n Zero\n HERE", " One \n Two\nZero".string
it_parses "<<-HERE\n One\n\n Zero\n HERE", " One\n\nZero".string
it_parses "<<-HERE\n One\n \n Zero\n HERE", " One\n\nZero".string
it_parses "<<-HERE\nHello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY\nHERE",
"Hello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY".string_interpolation
it_parses "<<-HERE\n One\n Zero\n HERE", " One\nZero".string_interpolation
it_parses "<<-HERE\n One \\n Two\n Zero\n HERE", " One \n Two\nZero".string_interpolation
it_parses "<<-HERE\n One\n\n Zero\n HERE", " One\n\nZero".string_interpolation
it_parses "<<-HERE\n One\n \n Zero\n HERE", " One\n\nZero".string_interpolation
it_parses "<<-HERE\n \#{1}One\n \#{2}Zero\n HERE", StringInterpolation.new([" ".string, 1.int32, "One\n".string, 2.int32, "Zero".string] of ASTNode)
it_parses "<<-HERE\n foo\#{1}bar\n baz\n HERE", StringInterpolation.new(["foo".string, 1.int32, "bar\n baz".string] of ASTNode)
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE", " One\r\nZero".string
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE\r\n", " One\r\nZero".string
it_parses "<<-SOME\n Sa\n Se\n SOME", "Sa\nSe".string
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE", " One\r\nZero".string_interpolation
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE\r\n", " One\r\nZero".string_interpolation
it_parses "<<-SOME\n Sa\n Se\n SOME", "Sa\nSe".string_interpolation
it_parses "<<-HERE\n \#{1} \#{2}\n HERE", StringInterpolation.new([1.int32, " ".string, 2.int32] of ASTNode)
it_parses "<<-HERE\n \#{1} \\n \#{2}\n HERE", StringInterpolation.new([1.int32, " \n ".string, 2.int32] of ASTNode)
assert_syntax_error "<<-HERE\n One\nwrong\n Zero\n HERE", "heredoc line must have an indent greater or equal than 2", 3, 1
Expand All @@ -1200,16 +1201,24 @@ describe "Parser" do
assert_syntax_error "<<-HERE\n One\n \#{1}\n HERE", "heredoc line must have an indent greater or equal than 2", 2, 1
assert_syntax_error %("foo" "bar")

it_parses "<<-'HERE'\n hello \\n world\n \#{1}\n HERE", StringLiteral.new("hello \\n world\n\#{1}")
it_parses "<<-'HERE'\n hello \\n world\n \#{1}\n HERE", "hello \\n world\n\#{1}".string_interpolation
assert_syntax_error "<<-'HERE\n", "expecting closing single quote"

it_parses "<<-FOO\n1\nFOO.bar", Call.new("1".string, "bar")
it_parses "<<-FOO\n1\nFOO + 2", Call.new("1".string, "+", 2.int32)
it_parses "<<-'HERE COMES HEREDOC'\n hello \\n world\n \#{1}\n HERE COMES HEREDOC", "hello \\n world\n\#{1}".string_interpolation

it_parses "<<-FOO\n\t1\n\tFOO", StringLiteral.new("1")
it_parses "<<-FOO\n \t1\n \tFOO", StringLiteral.new("1")
it_parses "<<-FOO\n \t 1\n \t FOO", StringLiteral.new("1")
it_parses "<<-FOO\n\t 1\n\t FOO", StringLiteral.new("1")
assert_syntax_error "<<-FOO\n1\nFOO.bar", "Unterminated heredoc: can't find \"FOO\" anywhere before the end of file"
assert_syntax_error "<<-FOO\n1\nFOO + 2", "Unterminated heredoc: can't find \"FOO\" anywhere before the end of file"

it_parses "<<-FOO\n\t1\n\tFOO", "1".string_interpolation
it_parses "<<-FOO\n \t1\n \tFOO", "1".string_interpolation
it_parses "<<-FOO\n \t 1\n \t FOO", "1".string_interpolation
it_parses "<<-FOO\n\t 1\n\t FOO", "1".string_interpolation

it_parses "x, y = <<-FOO, <<-BAR\nhello\nFOO\nworld\nBAR",
MultiAssign.new(["x".var, "y".var] of ASTNode, ["hello".string_interpolation, "world".string_interpolation] of ASTNode)

it_parses "x, y, z = <<-FOO, <<-BAR, <<-BAZ\nhello\nFOO\nworld\nBAR\n!\nBAZ",
MultiAssign.new(["x".var, "y".var, "z".var] of ASTNode, ["hello".string_interpolation, "world".string_interpolation, "!".string_interpolation] of ASTNode)

it_parses "enum Foo; A\nB, C\nD = 1; end", EnumDef.new("Foo".path, [Arg.new("A"), Arg.new("B"), Arg.new("C"), Arg.new("D", 1.int32)] of ASTNode)
it_parses "enum Foo; A = 1, B; end", EnumDef.new("Foo".path, [Arg.new("A", 1.int32), Arg.new("B")] of ASTNode)
Expand Down
4 changes: 4 additions & 0 deletions spec/support/syntax.cr
Expand Up @@ -90,6 +90,10 @@ class String
StringLiteral.new self
end

def string_interpolation
StringInterpolation.new([self.string] of ASTNode)
end

def float32
NumberLiteral.new self, :f32
end
Expand Down
11 changes: 11 additions & 0 deletions src/compiler/crystal/semantic/normalizer.cr
Expand Up @@ -375,5 +375,16 @@ module Crystal
Assign.new(target.clone, call).at(node)
end
end

def transform(node : StringInterpolation)
# If the interpolation has just one string literal inside it,
# return that instead of an interpolation
if node.expressions.size == 1
first = node.expressions.first
return first if first.is_a?(StringLiteral)
end

super
end
end
end
58 changes: 41 additions & 17 deletions src/compiler/crystal/syntax/lexer.cr
Expand Up @@ -18,6 +18,13 @@ module Crystal
@token_end_location : Location?
@string_pool : StringPool

# This is an interface for storing data associated to a heredoc
module HeredocItem
end

# Heredocs pushed when found. Should be processed when encountering a newline
getter heredocs = [] of {Token::DelimiterState, HeredocItem}

def initialize(string, string_pool : StringPool? = nil)
@reader = Char::Reader.new(string)
@token = Token.new
Expand Down Expand Up @@ -158,30 +165,36 @@ module Crystal
found_closing_single_quote = false

char = next_char
start_here = current_pos

if char == '\''
has_single_quote = true
char = next_char
start_here = current_pos
end

unless ident_start?(char)
unless ident_part?(char)
raise "heredoc identifier starts with invalid character"
end

here << char
end_here = 0

while true
char = next_char
case
when char == '\r'
if peek_next_char == '\n'
next
end_here = current_pos
next_char
break
else
raise "expecting '\\n' after '\\r'"
end
when char == '\n'
incr_line_number 0
end_here = current_pos
break
when ident_part?(char)
here << char
# ok
when char == '\0'
raise "Unexpected EOF on heredoc identifier"
else
Expand All @@ -191,8 +204,11 @@ module Crystal
if peek != '\r' && peek != '\n'
raise "expecting '\\n' or '\\r' after closing single quote"
end
elsif has_single_quote
# wait until another quote
else
raise "invalid character #{char.inspect} for heredoc identifier"
end_here = current_pos
break
end
end
end
Expand All @@ -201,8 +217,11 @@ module Crystal
raise "expecting closing single quote"
end

here = here.to_s
delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote
end_here -= 1 if has_single_quote

here = string_range(start_here, end_here)

delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote, advance: false
else
@token.type = :"<<"
end
Expand Down Expand Up @@ -1176,6 +1195,10 @@ module Crystal
end

def consume_newlines
# If there are heredocs we don't freely consume newlines because
# these will be part of the heredoc string
return unless @heredocs.empty?

if @count_whitespace
return
end
Expand Down Expand Up @@ -1721,6 +1744,7 @@ module Crystal

def next_string_token(delimiter_state)
@token.line_number = @line_number
@token.delimiter_state = delimiter_state

start = current_pos
string_end = delimiter_state.end
Expand All @@ -1737,13 +1761,13 @@ module Crystal
else
@token.type = :STRING
@token.value = string_end.to_s
@token.delimiter_state = @token.delimiter_state.with_open_count_delta(-1)
@token.delimiter_state = delimiter_state.with_open_count_delta(-1)
end
when string_nest
next_char
@token.type = :STRING
@token.value = string_nest.to_s
@token.delimiter_state = @token.delimiter_state.with_open_count_delta(+1)
@token.delimiter_state = delimiter_state.with_open_count_delta(+1)
when '\\'
if delimiter_state.allow_escapes
if delimiter_state.kind == :regex
Expand Down Expand Up @@ -1877,10 +1901,9 @@ module Crystal

if reached_end &&
(current_char == '\n' || current_char == '\0' ||
(current_char == '\r' && peek_next_char == '\n' && next_char) ||
!ident_part?(current_char))
(current_char == '\r' && peek_next_char == '\n' && next_char))
@token.type = :DELIMITER_END
@token.delimiter_state = @token.delimiter_state.with_heredoc_indent(indent)
@token.delimiter_state = delimiter_state.with_heredoc_indent(indent)
else
@reader.pos = old_pos
@column_number = old_column
Expand Down Expand Up @@ -1923,8 +1946,9 @@ module Crystal
msg = case delimiter_state.kind
when :command then "Unterminated command literal"
when :regex then "Unterminated regular expression"
when :heredoc then "Unterminated heredoc"
when :string then "Unterminated string literal"
when :heredoc
"Unterminated heredoc: can't find \"#{delimiter_state.end}\" anywhere before the end of file"
when :string then "Unterminated string literal"
else
::raise "unreachable"
end
Expand Down Expand Up @@ -2409,8 +2433,8 @@ module Crystal
@token.value = value
end

def delimited_pair(kind, string_nest, string_end, start, allow_escapes = true)
next_char
def delimited_pair(kind, string_nest, string_end, start, allow_escapes = true, advance = true)
next_char if advance
@token.type = :DELIMITER_START
@token.delimiter_state = Token::DelimiterState.new(kind, string_nest, string_end, allow_escapes)
set_token_raw_from_start(start)
Expand Down

0 comments on commit 8eb8554

Please sign in to comment.