Skip to content

Commit

Permalink
Correct implementation of heredoc (#5578)
Browse files Browse the repository at this point in the history
Now you can specify multiple heredocs in a single line, just like in Ruby.
asterite authored and RX14 committed Jan 17, 2018

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 295ddc3 commit 8eb8554
Showing 9 changed files with 224 additions and 96 deletions.
6 changes: 3 additions & 3 deletions spec/compiler/formatter/formatter_spec.cr
Original file line number Diff line number Diff line change
@@ -881,6 +881,9 @@ describe Crystal::Formatter do
assert_format "<<-HTML\n hello \n world \n HTML"
assert_format " <<-HTML\n hello \n world \n HTML", "<<-HTML\n hello \n world \n HTML"

assert_format "x, y = <<-FOO, <<-BAR\n hello\n FOO\n world\n BAR"
assert_format "x, y, z = <<-FOO, <<-BAR, <<-BAZ\n hello\n FOO\n world\n BAR\n qux\nBAZ"

assert_format "#!shebang\n1 + 2"

assert_format " {{\n1 + 2 }}", "{{\n 1 + 2\n}}"
@@ -1029,9 +1032,6 @@ describe Crystal::Formatter do

assert_format "lib Foo\n {% if 1 %}\n 2\n {% end %}\nend\n\nmacro bar\n 1\nend"

assert_format %(puts(<<-FOO\n1\nFOO, 2))
assert_format %(puts <<-FOO\n1\nFOO, 2)

assert_format "x : Int32 |\nString", "x : Int32 |\n String"

assert_format %(foo("bar" \\\n"baz")), %(foo("bar" \\\n "baz"))
42 changes: 8 additions & 34 deletions spec/compiler/lexer/lexer_string_spec.cr
Original file line number Diff line number Diff line change
@@ -161,6 +161,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("Hello, mom! I am HERE.")
tester.next_string_token_should_be("\nHER dress is beautiful.")
tester.next_string_token_should_be("\nHE is OK.")
@@ -173,6 +174,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.next_string_token_should_be("\n")
tester.string_should_end_correctly
@@ -183,6 +185,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.next_string_token_should_be("\r\n")
tester.string_should_end_correctly
@@ -193,6 +196,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("foo")
tester.string_should_end_correctly
end
@@ -203,6 +207,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("Hello, mom! I am HERE.")
tester.token_should_be_at(line: 2)
tester.next_string_token_should_be("\nHER dress is beautiful.")
@@ -221,6 +226,7 @@ describe "Lexer string" do
tester = LexerObjects::Strings.new(lexer)

tester.string_should_start_correctly
tester.next_token_should_be(:NEWLINE)
tester.next_string_token_should_be("abc")
tester.string_should_have_an_interpolation_of("foo")
tester.string_should_end_correctly
@@ -239,46 +245,14 @@ describe "Lexer string" do
end
end

it "raises on invalid heredoc identifier (<<-HERE A)" do
lexer = Lexer.new("<<-HERE A\ntest\nHERE\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises on invalid heredoc identifier (<<-HERE\\n)" do
lexer = Lexer.new("<<-HERE\\ntest\nHERE\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises when identifier doesn't start with a leter" do
lexer = Lexer.new("<<-123\\ntest\n123\n")
it "raises when identifier doesn't start with a leter or number" do
lexer = Lexer.new("<<-!!!\\ntest\n!!!\n")

expect_raises Crystal::SyntaxException, /heredoc identifier starts with invalid character/ do
lexer.next_token
end
end

it "raises when identifier contains a character not for identifier" do
lexer = Lexer.new("<<-aaa.bbb?\\ntest\naaa.bbb?\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises when identifier contains spaces" do
lexer = Lexer.new("<<-aaa bbb\\ntest\naaabbb\n")

expect_raises Crystal::SyntaxException, /invalid character '.+' for heredoc identifier/ do
lexer.next_token
end
end

it "raises on unexpected EOF while lexing heredoc" do
lexer = Lexer.new("<<-aaa")

4 changes: 4 additions & 0 deletions spec/compiler/normalize/string_interpolation_spec.cr
Original file line number Diff line number Diff line change
@@ -10,4 +10,8 @@ describe "Normalize: string interpolation" do
assert_expand "\"foo\#{bar}#{s}\"",
"((((::String::Builder.new(218)) << \"foo\") << bar) << \"#{s}\").to_s"
end

it "normalizes heredoc" do
assert_normalize "<<-FOO\nhello\nFOO", %("hello")
end
end
39 changes: 24 additions & 15 deletions spec/compiler/parser/parser_spec.cr
Original file line number Diff line number Diff line change
@@ -1180,16 +1180,17 @@ describe "Parser" do
it_parses %("hello " \\\n "world"), StringLiteral.new("hello world")
it_parses %("hello "\\\n"world"), StringLiteral.new("hello world")
it_parses %("hello \#{1}" \\\n "\#{2} world"), StringInterpolation.new(["hello ".string, 1.int32, 2.int32, " world".string] of ASTNode)
it_parses "<<-HERE\nHello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY\nHERE", "Hello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY".string
it_parses "<<-HERE\n One\n Zero\n HERE", " One\nZero".string
it_parses "<<-HERE\n One \\n Two\n Zero\n HERE", " One \n Two\nZero".string
it_parses "<<-HERE\n One\n\n Zero\n HERE", " One\n\nZero".string
it_parses "<<-HERE\n One\n \n Zero\n HERE", " One\n\nZero".string
it_parses "<<-HERE\nHello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY\nHERE",
"Hello, mom! I am HERE.\nHER dress is beautiful.\nHE is OK.\n HERESY".string_interpolation
it_parses "<<-HERE\n One\n Zero\n HERE", " One\nZero".string_interpolation
it_parses "<<-HERE\n One \\n Two\n Zero\n HERE", " One \n Two\nZero".string_interpolation
it_parses "<<-HERE\n One\n\n Zero\n HERE", " One\n\nZero".string_interpolation
it_parses "<<-HERE\n One\n \n Zero\n HERE", " One\n\nZero".string_interpolation
it_parses "<<-HERE\n \#{1}One\n \#{2}Zero\n HERE", StringInterpolation.new([" ".string, 1.int32, "One\n".string, 2.int32, "Zero".string] of ASTNode)
it_parses "<<-HERE\n foo\#{1}bar\n baz\n HERE", StringInterpolation.new(["foo".string, 1.int32, "bar\n baz".string] of ASTNode)
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE", " One\r\nZero".string
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE\r\n", " One\r\nZero".string
it_parses "<<-SOME\n Sa\n Se\n SOME", "Sa\nSe".string
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE", " One\r\nZero".string_interpolation
it_parses "<<-HERE\r\n One\r\n Zero\r\n HERE\r\n", " One\r\nZero".string_interpolation
it_parses "<<-SOME\n Sa\n Se\n SOME", "Sa\nSe".string_interpolation
it_parses "<<-HERE\n \#{1} \#{2}\n HERE", StringInterpolation.new([1.int32, " ".string, 2.int32] of ASTNode)
it_parses "<<-HERE\n \#{1} \\n \#{2}\n HERE", StringInterpolation.new([1.int32, " \n ".string, 2.int32] of ASTNode)
assert_syntax_error "<<-HERE\n One\nwrong\n Zero\n HERE", "heredoc line must have an indent greater or equal than 2", 3, 1
@@ -1200,16 +1201,24 @@ describe "Parser" do
assert_syntax_error "<<-HERE\n One\n \#{1}\n HERE", "heredoc line must have an indent greater or equal than 2", 2, 1
assert_syntax_error %("foo" "bar")

it_parses "<<-'HERE'\n hello \\n world\n \#{1}\n HERE", StringLiteral.new("hello \\n world\n\#{1}")
it_parses "<<-'HERE'\n hello \\n world\n \#{1}\n HERE", "hello \\n world\n\#{1}".string_interpolation
assert_syntax_error "<<-'HERE\n", "expecting closing single quote"

it_parses "<<-FOO\n1\nFOO.bar", Call.new("1".string, "bar")
it_parses "<<-FOO\n1\nFOO + 2", Call.new("1".string, "+", 2.int32)
it_parses "<<-'HERE COMES HEREDOC'\n hello \\n world\n \#{1}\n HERE COMES HEREDOC", "hello \\n world\n\#{1}".string_interpolation

it_parses "<<-FOO\n\t1\n\tFOO", StringLiteral.new("1")
it_parses "<<-FOO\n \t1\n \tFOO", StringLiteral.new("1")
it_parses "<<-FOO\n \t 1\n \t FOO", StringLiteral.new("1")
it_parses "<<-FOO\n\t 1\n\t FOO", StringLiteral.new("1")
assert_syntax_error "<<-FOO\n1\nFOO.bar", "Unterminated heredoc: can't find \"FOO\" anywhere before the end of file"
assert_syntax_error "<<-FOO\n1\nFOO + 2", "Unterminated heredoc: can't find \"FOO\" anywhere before the end of file"

it_parses "<<-FOO\n\t1\n\tFOO", "1".string_interpolation
it_parses "<<-FOO\n \t1\n \tFOO", "1".string_interpolation
it_parses "<<-FOO\n \t 1\n \t FOO", "1".string_interpolation
it_parses "<<-FOO\n\t 1\n\t FOO", "1".string_interpolation

it_parses "x, y = <<-FOO, <<-BAR\nhello\nFOO\nworld\nBAR",
MultiAssign.new(["x".var, "y".var] of ASTNode, ["hello".string_interpolation, "world".string_interpolation] of ASTNode)

it_parses "x, y, z = <<-FOO, <<-BAR, <<-BAZ\nhello\nFOO\nworld\nBAR\n!\nBAZ",
MultiAssign.new(["x".var, "y".var, "z".var] of ASTNode, ["hello".string_interpolation, "world".string_interpolation, "!".string_interpolation] of ASTNode)

it_parses "enum Foo; A\nB, C\nD = 1; end", EnumDef.new("Foo".path, [Arg.new("A"), Arg.new("B"), Arg.new("C"), Arg.new("D", 1.int32)] of ASTNode)
it_parses "enum Foo; A = 1, B; end", EnumDef.new("Foo".path, [Arg.new("A", 1.int32), Arg.new("B")] of ASTNode)
4 changes: 4 additions & 0 deletions spec/support/syntax.cr
Original file line number Diff line number Diff line change
@@ -90,6 +90,10 @@ class String
StringLiteral.new self
end

def string_interpolation
StringInterpolation.new([self.string] of ASTNode)
end

def float32
NumberLiteral.new self, :f32
end
11 changes: 11 additions & 0 deletions src/compiler/crystal/semantic/normalizer.cr
Original file line number Diff line number Diff line change
@@ -375,5 +375,16 @@ module Crystal
Assign.new(target.clone, call).at(node)
end
end

def transform(node : StringInterpolation)
# If the interpolation has just one string literal inside it,
# return that instead of an interpolation
if node.expressions.size == 1
first = node.expressions.first
return first if first.is_a?(StringLiteral)
end

super
end
end
end
58 changes: 41 additions & 17 deletions src/compiler/crystal/syntax/lexer.cr
Original file line number Diff line number Diff line change
@@ -18,6 +18,13 @@ module Crystal
@token_end_location : Location?
@string_pool : StringPool

# This is an interface for storing data associated to a heredoc
module HeredocItem
end

# Heredocs pushed when found. Should be processed when encountering a newline
getter heredocs = [] of {Token::DelimiterState, HeredocItem}

def initialize(string, string_pool : StringPool? = nil)
@reader = Char::Reader.new(string)
@token = Token.new
@@ -158,30 +165,36 @@ module Crystal
found_closing_single_quote = false

char = next_char
start_here = current_pos

if char == '\''
has_single_quote = true
char = next_char
start_here = current_pos
end

unless ident_start?(char)
unless ident_part?(char)
raise "heredoc identifier starts with invalid character"
end

here << char
end_here = 0

while true
char = next_char
case
when char == '\r'
if peek_next_char == '\n'
next
end_here = current_pos
next_char
break
else
raise "expecting '\\n' after '\\r'"
end
when char == '\n'
incr_line_number 0
end_here = current_pos
break
when ident_part?(char)
here << char
# ok
when char == '\0'
raise "Unexpected EOF on heredoc identifier"
else
@@ -191,8 +204,11 @@ module Crystal
if peek != '\r' && peek != '\n'
raise "expecting '\\n' or '\\r' after closing single quote"
end
elsif has_single_quote
# wait until another quote
else
raise "invalid character #{char.inspect} for heredoc identifier"
end_here = current_pos
break
end
end
end
@@ -201,8 +217,11 @@ module Crystal
raise "expecting closing single quote"
end

here = here.to_s
delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote
end_here -= 1 if has_single_quote

here = string_range(start_here, end_here)

delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote, advance: false
else
@token.type = :"<<"
end
@@ -1176,6 +1195,10 @@ module Crystal
end

def consume_newlines
# If there are heredocs we don't freely consume newlines because
# these will be part of the heredoc string
return unless @heredocs.empty?

if @count_whitespace
return
end
@@ -1721,6 +1744,7 @@ module Crystal

def next_string_token(delimiter_state)
@token.line_number = @line_number
@token.delimiter_state = delimiter_state

start = current_pos
string_end = delimiter_state.end
@@ -1737,13 +1761,13 @@ module Crystal
else
@token.type = :STRING
@token.value = string_end.to_s
@token.delimiter_state = @token.delimiter_state.with_open_count_delta(-1)
@token.delimiter_state = delimiter_state.with_open_count_delta(-1)
end
when string_nest
next_char
@token.type = :STRING
@token.value = string_nest.to_s
@token.delimiter_state = @token.delimiter_state.with_open_count_delta(+1)
@token.delimiter_state = delimiter_state.with_open_count_delta(+1)
when '\\'
if delimiter_state.allow_escapes
if delimiter_state.kind == :regex
@@ -1877,10 +1901,9 @@ module Crystal

if reached_end &&
(current_char == '\n' || current_char == '\0' ||
(current_char == '\r' && peek_next_char == '\n' && next_char) ||
!ident_part?(current_char))
(current_char == '\r' && peek_next_char == '\n' && next_char))
@token.type = :DELIMITER_END
@token.delimiter_state = @token.delimiter_state.with_heredoc_indent(indent)
@token.delimiter_state = delimiter_state.with_heredoc_indent(indent)
else
@reader.pos = old_pos
@column_number = old_column
@@ -1923,8 +1946,9 @@ module Crystal
msg = case delimiter_state.kind
when :command then "Unterminated command literal"
when :regex then "Unterminated regular expression"
when :heredoc then "Unterminated heredoc"
when :string then "Unterminated string literal"
when :heredoc
"Unterminated heredoc: can't find \"#{delimiter_state.end}\" anywhere before the end of file"
when :string then "Unterminated string literal"
else
::raise "unreachable"
end
@@ -2409,8 +2433,8 @@ module Crystal
@token.value = value
end

def delimited_pair(kind, string_nest, string_end, start, allow_escapes = true)
next_char
def delimited_pair(kind, string_nest, string_end, start, allow_escapes = true, advance = true)
next_char if advance
@token.type = :DELIMITER_START
@token.delimiter_state = Token::DelimiterState.new(kind, string_nest, string_end, allow_escapes)
set_token_raw_from_start(start)
88 changes: 74 additions & 14 deletions src/compiler/crystal/syntax/parser.cr
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ module Crystal
@wants_doc = false
@doc_enabled = false
@no_type_declaration = 0
@consuming_heredocs = false

# This flags tells the parser where it has to consider a "do"
# as belonging to the current parsed call. For example when writing
@@ -1790,6 +1791,13 @@ module Crystal

check :DELIMITER_START

if delimiter_state.kind == :heredoc
node = StringInterpolation.new([] of ASTNode).at(location)
@heredocs << {delimiter_state, node}
next_token
return node
end

next_string_token(delimiter_state)
delimiter_state = @token.delimiter_state

@@ -1814,22 +1822,10 @@ module Crystal
end

if has_interpolation
if needs_heredoc_indent_removed?(delimiter_state)
pieces = remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
else
pieces = pieces.map do |piece|
value = piece.value
value.is_a?(String) ? StringLiteral.new(value) : value
end
end
pieces = combine_interpolation_pieces(pieces, delimiter_state)
result = StringInterpolation.new(pieces).at(location)
else
if needs_heredoc_indent_removed?(delimiter_state)
pieces = remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
string = pieces.join { |piece| piece.as(StringLiteral).value }
else
string = pieces.map(&.value).join
end
string = combine_pieces(pieces, delimiter_state)
result = StringLiteral.new string
end

@@ -1849,6 +1845,26 @@ module Crystal
result
end

private def combine_interpolation_pieces(pieces, delimiter_state)
if needs_heredoc_indent_removed?(delimiter_state)
remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
else
pieces.map do |piece|
value = piece.value
value.is_a?(String) ? StringLiteral.new(value) : value
end
end
end

private def combine_pieces(pieces, delimiter_state)
if needs_heredoc_indent_removed?(delimiter_state)
pieces = remove_heredoc_indent(pieces, delimiter_state.heredoc_indent)
pieces.join { |piece| piece.as(StringLiteral).value }
else
pieces.map(&.value).join
end
end

def consume_delimiter(pieces, delimiter_state, has_interpolation)
options = Regex::Options::None
token_end_location = nil
@@ -1927,6 +1943,35 @@ module Crystal
options
end

def consume_heredocs
@consuming_heredocs = true
@heredocs.reverse!
while heredoc = @heredocs.pop?
consume_heredoc(heredoc[0], heredoc[1].as(StringInterpolation))
end
@consuming_heredocs = false
end

def consume_heredoc(delimiter_state, node)
next_string_token(delimiter_state)
delimiter_state = @token.delimiter_state

pieces = [] of Piece
has_interpolation = false

delimiter_state, has_interpolation, options, token_end_location = consume_delimiter pieces, delimiter_state, has_interpolation

if has_interpolation
pieces = combine_interpolation_pieces(pieces, delimiter_state)
node.expressions.concat(pieces)
else
string = combine_pieces(pieces, delimiter_state)
node.expressions.push(StringLiteral.new(string).at(node.location).at_end(token_end_location))
end

node.end_location = token_end_location
end

def needs_heredoc_indent_removed?(delimiter_state)
delimiter_state.kind == :heredoc && delimiter_state.heredoc_indent > 0
end
@@ -1939,6 +1984,7 @@ module Crystal
pieces.each_with_index do |piece, i|
value = piece.value
line_number = piece.line_number

this_piece_is_in_new_line = line_number != previous_line_number
next_piece_is_in_new_line = i == pieces.size - 1 || pieces[i + 1].line_number != line_number
if value.is_a?(String)
@@ -5537,5 +5583,19 @@ module Crystal
@visibility = old_visibility
value
end

def next_token
token = super

if token.type == :NEWLINE && !@consuming_heredocs && !@heredocs.empty?
consume_heredocs
end

token
end
end

class StringInterpolation
include Lexer::HeredocItem
end
end
68 changes: 55 additions & 13 deletions src/compiler/crystal/tools/formatter.cr
Original file line number Diff line number Diff line change
@@ -45,6 +45,16 @@ module Crystal
end_line : Int32,
difference : Int32

record HeredocInfo,
node : StringInterpolation,
token : Token,
line : Int32,
column : Int32,
indent : Int32,
string_continuation : Int32 do
include Lexer::HeredocItem
end

@lexer : Lexer
@comment_columns : Array(Int32?)
@indent : Int32
@@ -86,8 +96,7 @@ module Crystal
@indent = 0
@line = 0
@column = 0
@token = @lexer.token
@token = next_token
@token = @lexer.next_token

@output = IO::Memory.new(source.bytesize)
@line_output = IO::Memory.new
@@ -500,19 +509,30 @@ module Crystal
end

def visit(node : StringInterpolation)
if @token.delimiter_state.kind == :heredoc
# For heredoc, only write the start: on a newline will print it
@lexer.heredocs << {@token.delimiter_state, HeredocInfo.new(node, @token.dup, @line, @column, @indent, @string_continuation)}
write @token.raw
next_token
return false
end

check :DELIMITER_START

column = @column
old_indent = @indent
old_string_continuation = @string_continuation
is_regex = @token.delimiter_state.kind == :regex
indent_difference = @token.column_number - (@column + 1)
visit_string_interpolation(node, @token, @line, @column, @indent, @string_continuation)
end

write @token.raw
def visit_string_interpolation(node, token, line, column, old_indent, old_string_continuation, wrote_token = false)
@token = token

is_regex = token.delimiter_state.kind == :regex
indent_difference = token.column_number - (column + 1)

write token.raw unless wrote_token
next_string_token

delimiter_state = @token.delimiter_state
is_heredoc = @token.delimiter_state.kind == :heredoc
delimiter_state = token.delimiter_state
is_heredoc = token.delimiter_state.kind == :heredoc
@last_is_heredoc = is_heredoc

heredoc_line = @line
@@ -615,6 +635,26 @@ module Crystal
end
end

private def consume_heredocs
@consuming_heredocs = true
@lexer.heredocs.reverse!
while heredoc = @lexer.heredocs.pop?
consume_heredoc(heredoc[0], heredoc[1].as(HeredocInfo))
write_line unless @lexer.heredocs.empty?
end
@consuming_heredocs = false
end

private def consume_heredoc(delimiter_state, info)
visit_string_interpolation(
info.node,
info.token,
info.line,
info.column,
info.indent, info.string_continuation,
wrote_token: true)
end

def visit(node : RegexLiteral)
accept node.value

@@ -4012,14 +4052,16 @@ module Crystal
io << @output
end

def maybe_reset_passed_backslash_newline
end

def next_token
current_line_number = @lexer.line_number
@token = @lexer.next_token
if @token.type == :DELIMITER_START
increment_lines(@lexer.line_number - current_line_number)
elsif @token.type == :NEWLINE
if !@lexer.heredocs.empty? && !@consuming_heredocs
write_line
consume_heredocs
end
end
@token
end

0 comments on commit 8eb8554

Please sign in to comment.