Skip to content

Commit

Permalink
Add preliminary support for dedenting heredocs (<<~). Issue #3565.
Browse files Browse the repository at this point in the history
I am not saying this resolves it because ripper still needs this and
also there is something still broken with mixed interpolated strings
but common cases work and I want to clean the tree for the next
round of fixes.  I also reenabled test_syntax.rb which will add
some failures but we can actually parse this file now :)
enebo committed Feb 4, 2016
1 parent 0f21ba1 commit 51315e0
Showing 9 changed files with 3,525 additions and 3,418 deletions.
71 changes: 67 additions & 4 deletions core/src/main/java/org/jruby/lexer/LexingCommon.java
Original file line number Diff line number Diff line change
@@ -16,6 +16,73 @@
* Code and constants common to both ripper and main parser.
*/
public abstract class LexingCommon {
protected int heredoc_indent = 0;
protected int heredoc_line_indent = 0;

public int getHeredocIndent() {
return heredoc_indent;
}

protected int dedent_string(ByteList string, int width) {
long len = string.realSize();
int i, col = 0;
byte[] str = string.unsafeBytes();
int begin = string.begin();

for (i = 0; i < len && col < width; i++) {
if (str[begin + i] == ' ') {
col++;
} else if (str[begin + i] == '\t') {
int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
if (n > width) break;
col = n;
} else {
break;
}
}

string.setBegin(begin + i);
string.setRealSize((int) len - i);
return i;
}

public void reset() {
heredoc_indent = 0;
heredoc_line_indent = 0;
}

public void setHeredocLineIndent(int heredoc_line_indent) {
this.heredoc_line_indent = heredoc_line_indent;
}

public void setHeredocIndent(int heredoc_indent) {
this.heredoc_indent = heredoc_indent;
}

public boolean update_heredoc_indent(int c) {
if (heredoc_line_indent == -1) {
if (c == '\n') heredoc_line_indent = 0;
} else if (c == ' ') {
heredoc_line_indent++;
return true;
} else if (c == '\t') {
int w = (heredoc_line_indent / TAB_WIDTH) + 1;
heredoc_line_indent = w * TAB_WIDTH;
return true;
} else if (c != '\n') {
if (heredoc_indent > heredoc_line_indent) heredoc_indent = heredoc_line_indent;
heredoc_line_indent = -1;
}

return false;
}

protected abstract void magicCommentEncoding(ByteList encoding);
protected abstract void setCompileOptionFlag(String name, ByteList value);
protected abstract void setTokenInfo(String name, ByteList value);

public static final int TAB_WIDTH = 8;

// ruby constants for strings (should this be moved somewhere else?)
public static final int STR_FUNC_ESCAPE=0x01;
public static final int STR_FUNC_EXPAND=0x02;
@@ -138,8 +205,4 @@ public boolean parseMagicComment(Ruby runtime, ByteList magicLine) throws IOExce

return true;
}

protected abstract void magicCommentEncoding(ByteList encoding);
protected abstract void setCompileOptionFlag(String name, ByteList value);
protected abstract void setTokenInfo(String name, ByteList value);
}
17 changes: 17 additions & 0 deletions core/src/main/java/org/jruby/lexer/yacc/HeredocTerm.java
Original file line number Diff line number Diff line change
@@ -118,6 +118,12 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
break;
}
}

if (lexer.getHeredocIndent() > 0) {
for (long i = 0; p + i < pend && lexer.update_heredoc_indent(lexer.p(p)); i++) {}
lexer.setHeredocLineIndent(0);
}

if (str != null) {
str.append(lbuf.makeShared(p, pend - p));
} else {
@@ -126,6 +132,11 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {

if (pend < lexer.lex_pend) str.append('\n');
lexer.lex_goto_eol();

if (lexer.getHeredocIndent() > 0) {
lexer.setValue(str);
return Tokens.tSTRING_CONTENT;
}
// MRI null checks str in this case but it is unconditionally non-null?
if (lexer.nextc() == -1) return error(lexer, len, null, eos);
} while (!lexer.whole_match_p(eos, indent));
@@ -162,6 +173,12 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
}
tok.append(lexer.nextc());

if (lexer.getHeredocIndent() > 0) {
lexer.lex_goto_eol();
lexer.setValue(lexer.createStr(tok, 0));
return Tokens.tSTRING_CONTENT;
}

if ((c = lexer.nextc()) == EOF) return error(lexer, len, str, eos);
} while (!lexer.whole_match_p(eos, indent));
str = tok;
35 changes: 33 additions & 2 deletions core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java
Original file line number Diff line number Diff line change
@@ -53,6 +53,7 @@
import org.jruby.ast.ComplexNode;
import org.jruby.ast.FixnumNode;
import org.jruby.ast.FloatNode;
import org.jruby.ast.ListNode;
import org.jruby.ast.Node;
import org.jruby.ast.NthRefNode;
import org.jruby.ast.NumericNode;
@@ -366,7 +367,8 @@ public RubyLexer(ParserSupport support, LexerSource source) {
reset();
}

public final void reset() {
public void reset() {
super.reset();
token = 0;
yaccValue = null;
setState(null);
@@ -461,6 +463,30 @@ public int nextc() {
return c;
}

public void heredoc_dedent(Node root) {
int indent = heredoc_indent;

if (indent <= 0 || root == null) return;

if (root instanceof StrNode) {
StrNode str = (StrNode) root;
dedent_string(str.getValue(), indent);
} else if (root instanceof ListNode) {
ListNode list = (ListNode) root;
int length = list.size();
// FIXME: I need a test case to see how this fails because MRI has bol (begin of line) boolean when
// it encounters non-str/dstr nodes but I am missing the knowledge to understand why it is needed
// and our layout is not as general as theirs so I cannot just nd->lit.
for (int i = 0; i < length; i++) {
Node child = list.get(i);

if (child instanceof StrNode) {
dedent_string(((StrNode) child).getValue(), indent);
}
}
}
}

public boolean peek(int c) {
return peek(c, 0);
}
@@ -1000,6 +1026,11 @@ private int hereDocumentIdentifier() throws IOException {
if (c == '-') {
c = nextc();
func = STR_FUNC_INDENT;
} else if (c == '~') {
c = nextc();
func = STR_FUNC_INDENT;
heredoc_indent = Integer.MAX_VALUE;
heredoc_line_indent = 0;
}

ByteList markerValue;
@@ -1030,7 +1061,7 @@ private int hereDocumentIdentifier() throws IOException {
if (!isIdentifierChar(c)) {
pushback(c);
if ((func & STR_FUNC_INDENT) != 0) {
pushback('-');
pushback(heredoc_indent > 0 ? '~' : '-');
}
return 0;
}
4 changes: 4 additions & 0 deletions core/src/main/java/org/jruby/lexer/yacc/StringTerm.java
Original file line number Diff line number Diff line change
@@ -263,6 +263,10 @@ public int parseStringIntoBuffer(RubyLexer lexer, ByteList buffer, Encoding enc[
int c;

while ((c = lexer.nextc()) != EOF) {
if (lexer.getHeredocIndent() > 0) {
lexer.update_heredoc_indent(c);
}

if (begin != '\0' && c == begin) {
nest++;
} else if (c == end) {
10 changes: 9 additions & 1 deletion core/src/main/java/org/jruby/parser/ParserSupport.java
Original file line number Diff line number Diff line change
@@ -943,7 +943,15 @@ public Node literal_concat(ISourcePosition position, Node head, Node tail) {

if (head instanceof EvStrNode) {
head = createDStrNode(head.getPosition()).add(head);
}
}

if (lexer.getHeredocIndent() > 0) {
if (head instanceof StrNode) {
head = createDStrNode(head.getPosition()).add(head);
} else if (head instanceof DStrNode) {
return list_append(head, tail);
}
}

if (tail instanceof StrNode) {
if (head instanceof StrNode) {
883 changes: 449 additions & 434 deletions core/src/main/java/org/jruby/parser/RubyParser.java

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion core/src/main/java/org/jruby/parser/RubyParser.y
Original file line number Diff line number Diff line change
@@ -1893,12 +1893,17 @@ string : tCHAR {
}

string1 : tSTRING_BEG string_contents tSTRING_END {
lexer.heredoc_dedent($2);
lexer.setHeredocIndent(0);
$$ = $2;
}

xstring : tXSTRING_BEG xstring_contents tSTRING_END {
ISourcePosition position = support.getPosition($2);

lexer.heredoc_dedent($2);
lexer.setHeredocIndent(0);

if ($2 == null) {
$$ = new XStrNode(position, null, StringSupport.CR_7BIT);
} else if ($2 instanceof StrNode) {
@@ -2028,14 +2033,19 @@ string_content : tSTRING_CONTENT {
} {
$$ = lexer.getBraceNest();
lexer.setBraceNest(0);
} {
$$ = lexer.getHeredocIndent();
lexer.setHeredocIndent(0);
} compstmt tSTRING_DEND {
lexer.getConditionState().restart();
lexer.setStrTerm($<StrTerm>2);
lexer.getCmdArgumentState().reset($<Long>3.longValue());
lexer.setState($<LexState>4);
lexer.setBraceNest($<Integer>5);
lexer.setHeredocIndent($<Integer>6);
lexer.setHeredocLineIndent(-1);

$$ = support.newEvStrNode(support.getPosition($6), $6);
$$ = support.newEvStrNode(support.getPosition($7), $7);
}

string_dvar : tGVAR {
5,908 changes: 2,934 additions & 2,974 deletions core/src/main/java/org/jruby/parser/YyTables.java

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions test/mri.index
Original file line number Diff line number Diff line change
@@ -90,8 +90,7 @@ ruby/test_stringchar.rb
ruby/test_struct.rb
ruby/test_super.rb
ruby/test_symbol.rb
# Commented out for #3565
#ruby/test_syntax.rb
ruby/test_syntax.rb
ruby/test_system.rb
ruby/test_thread.rb
ruby/test_time.rb

0 comments on commit 51315e0

Please sign in to comment.