Skip to content

Commit

Permalink
Correct implementation of heredoc (#5578)
Browse files Browse the repository at this point in the history
Now you can specify multiple heredocs in a single line, just like in Ruby.
asterite authored and RX14 committed Jan 17, 2018
1 parent 295ddc3 commit 8eb8554
Showing 9 changed files with 224 additions and 96 deletions.
6 changes: 3 additions & 3 deletions spec/compiler/formatter/formatter_spec.cr
Original file line number Diff line number Diff line change
@@ -881,6 +881,9 @@ describe Crystal::Formatter do
assert_format "<<-HTML\n hello \n world \n HTML"
assert_format " <<-HTML\n hello \n world \n HTML", "<<-HTML\n hello \n world \n HTML"

assert_format "x, y = <<-FOO, <<-BAR\n hello\n FOO\n world\n BAR"
assert_format "x, y, z = <<-FOO, <<-BAR, <<-BAZ\n hello\n FOO\n world\n BAR\n qux\nBAZ"

assert_format "#!shebang\n1 + 2"

assert_format " {{\n1 + 2 }}", "{{\n 1 + 2\n}}"
@@ -1029,9 +1032,6 @@ describe Crystal::Formatter do

assert_format "lib Foo\n {% if 1 %}\n 2\n {% end %}\nend\n\nmacro bar\n 1\nend"

assert_format %(puts(<<-FOO\n1\nFOO, 2))
assert_format %(puts <<-FOO\n1\nFOO, 2)

assert_format "x : Int32 |\nString", "x : Int32 |\n String"

assert_format %(foo("bar" \\\n"baz")), %(foo("bar" \\\n "baz"))
42 changes: 8 additions & 34 deletions spec/compiler/lexer/lexer_string_spec.cr
Original file line number Diff line number Diff line change
@@ -161,6 +161,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("Hello, mom! I am HERE.")
tester.next_string_token_should_be("\nHER dress is beautiful.")
tester.next_string_token_should_be("\nHE is OK.")
@@ -173,6 +174,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.next_string_token_should_be("\n")
tester.string_should_end_correctly
@@ -183,6 +185,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.next_string_token_should_be("\r\n")
tester.string_should_end_correctly
@@ -193,6 +196,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.string_should_end_correctly
end
@@ -203,6 +207,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("Hello, mom! I am HERE.")
tester.token_should_be_at(line: 2)
tester.next_string_token_should_be("\nHER dress is beautiful.")
@@ -221,6 +226,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("abc")
tester.string_should_have_an_interpolation_of("foo")
tester.string_should_end_correctly
@@ -239,46 +245,14 @@ describe "Lexer string" do
end
end

it "raises on invalid heredoc identifier (<<-HERE A)" do
lexer = Lexer.new("<<-HERE A\ntest\nHERE\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises on invalid heredoc identifier (<<-HERE\\n)" do
lexer = Lexer.new("<<-HERE\\ntest\nHERE\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises when identifier doesn't start with a leter" do
lexer = Lexer.new("<<-123\\ntest\n123\n")
it "raises when identifier doesn't start with a leter or number" do
lexer = Lexer.new("<<-!!!\\ntest\n!!!\n")

expect_raises Crystal::SyntaxException, /heredoc identifier starts with invalid character/ do
lexer.next_token
end
end

it "raises when identifier contains a character not for identifier" do
lexer = Lexer.new("<<-aaa.bbb?\\ntest\naaa.bbb?\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises when identifier contains spaces" do
lexer = Lexer.new("<<-aaa bbb\\ntest\naaabbb\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises on unexpected EOF while lexing heredoc" do
lexer = Lexer.new("<<-aaa")

4 changes: 4 additions & 0 deletions spec/compiler/normalize/string_interpolation_spec.cr
Original file line number Diff line number Diff line change
@@ -10,4 +10,8 @@ describe "Normalize: string interpolation" do
assert_expand "\"foo\#{bar}#{s}\"",
"((((::String::Builder.new(218)) << \"foo\") << bar) << \"#{s}\").to_s"
end

it "normalizes heredoc" do
assert_normalize "<<-FOO\nhello\nFOO", %("hello")
end
end
39 changes: 24 additions & 15 deletions spec/compiler/parser/parser_spec.cr
Original file line number Diff line number Diff line change
@@ -1180,16 +1180,17 @@ describe "Parser" do
it_parses %("hello " \\\n "world"), StringLiteral.new("hello world")
it_parses %("hello "\\\n"world"), StringLiteral.new("hello world")
it_parses %("hello \#{1}" \\\n "\#{2} world"), StringInterpolation.new(["hello ".string, 1.int32, 2.int32, " world".string] of ASTNode)
it_parses "<<-HERE\nHello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY\nHERE", "Hello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY".string
it_parses "<<-HERE\n One\n Zero\n HERE", " One\nZero".string
it_parses "<<-HERE\n One \\n Two\n Zero\n HERE", " One \n Two\nZero".string
it_parses "<<-HERE\n One\n\n Zero\n HERE", " One\n\nZero".string
it_parses "<<-HERE\n One\n \n Zero\n HERE", " One\n\nZero".string
it_parses "<<-HERE\nHello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY\nHERE",
"Hello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY".string_interpolation
it_parses "<<-HERE\n One\n Zero\n HERE", " One\nZero".string_interpolation
it_parses "<<-HERE\n One \\n Two\n Zero\n HERE", " One \n Two\nZero".string_interpolation
it_parses "<<-HERE\n One\n\n Zero\n HERE", " One\n\nZero".string_interpolation
it_parses "<<-HERE\n One\n \n Zero\n HERE", " One\n\nZero".string_interpolation
it_parses "<<-HERE\n \#{1}One\n \#{2}Zero\n HERE", StringInterpolation.new([" ".string, 1.int32, "One\n".string, 2.int32, "Zero".string] of ASTNode)
it_parses "<<-HERE\n foo\#{1}bar\n baz\n HERE", StringInterpolation.new(["foo".string, 1.int32, "bar\n baz".string] of ASTNode)
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE", " One\r\nZero".string
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE\r\n", " One\r\nZero".string
it_parses "<<-SOME\n Sa\n Se\n SOME", "Sa\nSe".string
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE", " One\r\nZero".string_interpolation
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE\r\n", " One\r\nZero".string_interpolation
it_parses "<<-SOME\n Sa\n Se\n SOME", "Sa\nSe".string_interpolation
it_parses "<<-HERE\n \#{1} \#{2}\n HERE", StringInterpolation.new([1.int32, " ".string, 2.int32] of ASTNode)
it_parses "<<-HERE\n \#{1} \\n \#{2}\n HERE", StringInterpolation.new([1.int32, " \n ".string, 2.int32] of ASTNode)
assert_syntax_error "<<-HERE\n One\nwrong\n Zero\n HERE", "heredoc line must have an indent greater or equal than 2", 3, 1
@@ -1200,16 +1201,24 @@ describe "Parser" do
assert_syntax_error "<<-HERE\n One\n \#{1}\n HERE", "heredoc line must have an indent greater or equal than 2", 2, 1
assert_syntax_error %("foo" "bar")

it_parses "<<-'HERE'\n hello \\n world\n \#{1}\n HERE", StringLiteral.new("hello \\n world\n\#{1}")
it_parses "<<-'HERE'\n hello \\n world\n \#{1}\n HERE", "hello \\n world\n\#{1}".string_interpolation
assert_syntax_error "<<-'HERE\n", "expecting closing single quote"

it_parses "<<-FOO\n1\nFOO.bar", Call.new("1".string, "bar")
it_parses "<<-FOO\n1\nFOO + 2", Call.new("1".string, "+", 2.int32)
it_parses "<<-'HERE COMES HEREDOC'\n hello \\n world\n \#{1}\n HERE COMES HEREDOC", "hello \\n world\n\#{1}".string_interpolation

it_parses "<<-FOO\n\t1\n\tFOO", StringLiteral.new("1")
it_parses "<<-FOO\n \t1\n \tFOO", StringLiteral.new("1")
it_parses "<<-FOO\n \t 1\n \t FOO", StringLiteral.new("1")
it_parses "<<-FOO\n\t 1\n\t FOO", StringLiteral.new("1")
assert_syntax_error "<<-FOO\n1\nFOO.bar", "Unterminated heredoc: can't find \"FOO\" anywhere before the end of file"
assert_syntax_error "<<-FOO\n1\nFOO + 2", "Unterminated heredoc: can't find \"FOO\" anywhere before the end of file"

it_parses "<<-FOO\n\t1\n\tFOO", "1".string_interpolation
it_parses "<<-FOO\n \t1\n \tFOO", "1".string_interpolation
it_parses "<<-FOO\n \t 1\n \t FOO", "1".string_interpolation
it_parses "<<-FOO\n\t 1\n\t FOO", "1".string_interpolation

it_parses "x, y = <<-FOO, <<-BAR\nhello\nFOO\nworld\nBAR",
MultiAssign.new(["x".var, "y".var] of ASTNode, ["hello".string_interpolation, "world".string_interpolation] of ASTNode)

it_parses "x, y, z = <<-FOO, <<-BAR, <<-BAZ\nhello\nFOO\nworld\nBAR\n!\nBAZ",
MultiAssign.new(["x".var, "y".var, "z".var] of ASTNode, ["hello".string_interpolation, "world".string_interpolation, "!".string_interpolation] of ASTNode)

it_parses "enum Foo; A\nB, C\nD = 1; end", EnumDef.new("Foo".path, [Arg.new("A"), Arg.new("B"), Arg.new("C"), Arg.new("D", 1.int32)] of ASTNode)
it_parses "enum Foo; A = 1, B; end", EnumDef.new("Foo".path, [Arg.new("A", 1.int32), Arg.new("B")] of ASTNode)
4 changes: 4 additions & 0 deletions spec/support/syntax.cr
Original file line number Diff line number Diff line change
@@ -90,6 +90,10 @@ class String
StringLiteral.new self
end

def string_interpolation
StringInterpolation.new([self.string] of ASTNode)
end

def float32
NumberLiteral.new self, :f32
end
11 changes: 11 additions & 0 deletions src/compiler/crystal/semantic/normalizer.cr
Original file line number Diff line number Diff line change
@@ -375,5 +375,16 @@ module Crystal
Assign.new(target.clone, call).at(node)
end
end

def transform(node : StringInterpolation)
# If the interpolation has just one string literal inside it,
# return that instead of an interpolation
if node.expressions.size == 1
first = node.expressions.first
return first if first.is_a?(StringLiteral)
end

super
end
end
end
58 changes: 41 additions & 17 deletions src/compiler/crystal/syntax/lexer.cr
Original file line number Diff line number Diff line change
@@ -18,6 +18,13 @@ module Crystal
@token_end_location : Location?
@string_pool : StringPool

# This is an interface for storing data associated to a heredoc
module HeredocItem
end

# Heredocs pushed when found. Should be processed when encountering a newline
getter heredocs = [] of {Token::DelimiterState, HeredocItem}

def initialize(string, string_pool : StringPool? = nil)
@reader = Char::Reader.new(string)
@token = Token.new
@@ -158,30 +165,36 @@ module Crystal
found_closing_single_quote = false

char = next_char
start_here = current_pos

if char == '\''
has_single_quote = true
char = next_char
start_here = current_pos
end

unless ident_start?(char)
unless ident_part?(char)
raise "heredoc identifier starts with invalid character"
end

here << char
end_here = 0

while true
char = next_char
case
when char == '\r'
if peek_next_char == '\n'
next
end_here = current_pos
next_char
break
else
raise "expecting '\\n' after '\\r'"
end
when char == '\n'
incr_line_number 0
end_here = current_pos
break
when ident_part?(char)
here << char
# ok
when char == '\0'
raise "Unexpected EOF on heredoc identifier"
else
@@ -191,8 +204,11 @@ module Crystal
if peek != '\r' && peek != '\n'
raise "expecting '\\n' or '\\r' after closing single quote"
end
elsif has_single_quote
# wait until another quote
else
raise "invalid character #{char.inspect} for heredoc identifier"
end_here = current_pos
break
end
end
end
@@ -201,8 +217,11 @@ module Crystal
raise "expecting closing single quote"
end

here = here.to_s
delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote
end_here -= 1 if has_single_quote

here = string_range(start_here, end_here)

delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote, advance: false
else
@token.type = :"<<"
end
@@ -1176,6 +1195,10 @@ module Crystal
end

def consume_newlines
# If there are heredocs we don't freely consume newlines because
# these will be part of the heredoc string
return unless @heredocs.empty?

if @count_whitespace
return
end
@@ -1721,6 +1744,7 @@ module Crystal

def next_string_token(delimiter_state)
@token.line_number = @line_number
@token.delimiter_state = delimiter_state

start = current_pos
string_end = delimiter_state.end
@@ -1737,13 +1761,13 @@ module Crystal
else
@token.type = :STRING
@token.value = string_end.to_s
@token.delimiter_state = @token.delimiter_state.with_open_count_delta(-1)
@token.delimiter_state = delimiter_state.with_open_count_delta(-1)
end
when string_nest
next_char
@token.type = :STRING
@token.value = string_nest.to_s
@token.delimiter_state = @token.delimiter_state.with_open_count_delta(+1)
@token.delimiter_state = delimiter_state.with_open_count_delta(+1)
when '\\'
if delimiter_state.allow_escapes
if delimiter_state.kind == :regex
@@ -1877,10 +1901,9 @@ module Crystal

if reached_end &&
(current_char == '\n' || current_char == '\0' ||
(current_char == '\r' && peek_next_char == '\n' && next_char) ||
!ident_part?(current_char))
(current_char == '\r' && peek_next_char == '\n' && next_char))
@token.type = :DELIMITER_END
@token.delimiter_state = @token.delimiter_state.with_heredoc_indent(indent)
@token.delimiter_state = delimiter_state.with_heredoc_indent(indent)
else
@reader.pos = old_pos
@column_number = old_column
@@ -1923,8 +1946,9 @@ module Crystal
msg = case delimiter_state.kind
when :command then "Unterminated command literal"
when :regex then "Unterminated regular expression"
when :heredoc then "Unterminated heredoc"
when :string then "Unterminated string literal"
when :heredoc
"Unterminated heredoc: can't find \"#{delimiter_state.end}\" anywhere before the end of file"
when :string then "Unterminated string literal"
else
::raise "unreachable"
end
@@ -2409,8 +2433,8 @@ module Crystal
@token.value = value
end

def delimited_pair(kind, string_nest, string_end, start, allow_escapes = true)
next_char
def delimited_pair(kind, string_nest, string_end, start, allow_escapes = true, advance = true)
next_char if advance
@token.type = :DELIMITER_START
@token.delimiter_state = Token::DelimiterState.new(kind, string_nest, string_end, allow_escapes)
set_token_raw_from_start(start)
88 changes: 74 additions & 14 deletions src/compiler/crystal/syntax/parser.cr
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ module Crystal
@wants_doc = false
@doc_enabled = false
@no_type_declaration = 0
@consuming_heredocs = false

# This flags tells the parser where it has to consider a "do"
# as belonging to the current parsed call. For example when writing
@@ -1790,6 +1791,13 @@ module Crystal

check :DELIMITER_START

if delimiter_state.kind == :heredoc
node = StringInterpolation.new([] of ASTNode).at(location)
@heredocs << {delimiter_state, node}
next_token
return node
end

next_string_token(delimiter_state)
delimiter_state = @token.delimiter_state

@@ -1814,22 +1822,10 @@ module Crystal
end

if has_interpolation
if needs_heredoc_indent_removed?(delimiter_state)
pieces = remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
else
pieces = pieces.map do |piece|
value = piece.value
value.is_a?(String) ? StringLiteral.new(value) : value
end
end
pieces = combine_interpolation_pieces(pieces, delimiter_state)
result = StringInterpolation.new(pieces).at(location)
else
if needs_heredoc_indent_removed?(delimiter_state)
pieces = remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
string = pieces.join { |piece| piece.as(StringLiteral).value }
else
string = pieces.map(&.value).join
end
string = combine_pieces(pieces, delimiter_state)
result = StringLiteral.new string
end

@@ -1849,6 +1845,26 @@ module Crystal
result
end

private def combine_interpolation_pieces(pieces, delimiter_state)
if needs_heredoc_indent_removed?(delimiter_state)
remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
else
pieces.map do |piece|
value = piece.value
value.is_a?(String) ? StringLiteral.new(value) : value
end
end
end

private def combine_pieces(pieces, delimiter_state)
if needs_heredoc_indent_removed?(delimiter_state)
pieces = remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
pieces.join { |piece| piece.as(StringLiteral).value }
else
pieces.map(&.value).join
end
end

def consume_delimiter(pieces, delimiter_state, has_interpolation)
options = Regex::Options::None
token_end_location = nil
@@ -1927,6 +1943,35 @@ module Crystal
options
end

def consume_heredocs
@consuming_heredocs = true
@heredocs.reverse!
while heredoc = @heredocs.pop?
consume_heredoc(heredoc[0], heredoc[1].as(StringInterpolation))
end
@consuming_heredocs = false
end

def consume_heredoc(delimiter_state, node)
next_string_token(delimiter_state)
delimiter_state = @token.delimiter_state

pieces = [] of Piece
has_interpolation = false

delimiter_state, has_interpolation, options, token_end_location = consume_delimiter pieces, delimiter_state, has_interpolation

if has_interpolation
pieces = combine_interpolation_pieces(pieces, delimiter_state)
node.expressions.concat(pieces)
else
string = combine_pieces(pieces, delimiter_state)
node.expressions.push(StringLiteral.new(string).at(node.location).at_end(token_end_location))
end

node.end_location = token_end_location
end

def needs_heredoc_indent_removed?(delimiter_state)
delimiter_state.kind == :heredoc && delimiter_state.heredoc_indent > 0
end
@@ -1939,6 +1984,7 @@ module Crystal
pieces.each_with_index do |piece, i|
value = piece.value
line_number = piece.line_number

this_piece_is_in_new_line = line_number != previous_line_number
next_piece_is_in_new_line = i == pieces.size - 1 || pieces[i + 1].line_number != line_number
if value.is_a?(String)
@@ -5537,5 +5583,19 @@ module Crystal
@visibility = old_visibility
value
end

def next_token
token = super

if token.type == :NEWLINE && !@consuming_heredocs && !@heredocs.empty?
consume_heredocs
end

token
end
end

class StringInterpolation
include Lexer::HeredocItem
end
end
68 changes: 55 additions & 13 deletions src/compiler/crystal/tools/formatter.cr
Original file line number Diff line number Diff line change
@@ -45,6 +45,16 @@ module Crystal
end_line : Int32,
difference : Int32

record HeredocInfo,
node : StringInterpolation,
token : Token,
line : Int32,
column : Int32,
indent : Int32,
string_continuation : Int32 do
include Lexer::HeredocItem
end

@lexer : Lexer
@comment_columns : Array(Int32?)
@indent : Int32
@@ -86,8 +96,7 @@ module Crystal
@indent = 0
@line = 0
@column = 0
@token = @lexer.token
@token = next_token
@token = @lexer.next_token

@output = IO::Memory.new(source.bytesize)
@line_output = IO::Memory.new
@@ -500,19 +509,30 @@ module Crystal
end

def visit(node : StringInterpolation)
if @token.delimiter_state.kind == :heredoc
# For heredoc, only write the start: on a newline will print it
@lexer.heredocs << {@token.delimiter_state, HeredocInfo.new(node, @token.dup, @line, @column, @indent, @string_continuation)}
write @token.raw
next_token
return false
end

check :DELIMITER_START

column = @column
old_indent = @indent
old_string_continuation = @string_continuation
is_regex = @token.delimiter_state.kind == :regex
indent_difference = @token.column_number - (@column + 1)
visit_string_interpolation(node, @token, @line, @column, @indent, @string_continuation)
end

write @token.raw
def visit_string_interpolation(node, token, line, column, old_indent, old_string_continuation, wrote_token = false)
@token = token

is_regex = token.delimiter_state.kind == :regex
indent_difference = token.column_number - (column + 1)

write token.raw unless wrote_token
next_string_token

delimiter_state = @token.delimiter_state
is_heredoc = @token.delimiter_state.kind == :heredoc
delimiter_state = token.delimiter_state
is_heredoc = token.delimiter_state.kind == :heredoc
@last_is_heredoc = is_heredoc

heredoc_line = @line
@@ -615,6 +635,26 @@ module Crystal
end
end

private def consume_heredocs
@consuming_heredocs = true
@lexer.heredocs.reverse!
while heredoc = @lexer.heredocs.pop?
consume_heredoc(heredoc[0], heredoc[1].as(HeredocInfo))
write_line unless @lexer.heredocs.empty?
end
@consuming_heredocs = false
end

private def consume_heredoc(delimiter_state, info)
visit_string_interpolation(
info.node,
info.token,
info.line,
info.column,
info.indent, info.string_continuation,
wrote_token: true)
end

def visit(node : RegexLiteral)
accept node.value

@@ -4012,14 +4052,16 @@ module Crystal
io << @output
end

def maybe_reset_passed_backslash_newline
end

def next_token
current_line_number = @lexer.line_number
@token = @lexer.next_token
if @token.type == :DELIMITER_START
increment_lines(@lexer.line_number - current_line_number)
elsif @token.type == :NEWLINE
if !@lexer.heredocs.empty? && !@consuming_heredocs
write_line
consume_heredocs
end
end
@token
end

0 comments on commit 8eb8554

Please sign in to comment.