Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: a07697364174
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 94505082ee25
Choose a head ref
  • 9 commits
  • 9 files changed
  • 2 contributors

Commits on Dec 19, 2017

  1. Ensure assignable constant failure produces error

    The recent fixes ensured that the on_assign_error callback was issued, but MRI fails the whole parse in this case.
    grddev committed Dec 19, 2017
    Copy the full SHA
    7b02e5c View commit details

Commits on Dec 20, 2017

  1. Do not skip whitespace in percent literals

    I'm not sure why this was introduced, as I don't think this was ever a thing for anything beyond w/W/i/I, and those have explicit whitespace removal in their respective cases.
    grddev committed Dec 20, 2017
    Copy the full SHA
    b07e268 View commit details

Commits on Dec 21, 2017

  1. Move peekVariableName to LexingCommon

    So that Ripper can also benefit from it.
    grddev committed Dec 21, 2017
    Copy the full SHA
    81b4fe6 View commit details
  2. Ensure # is included in string with invalid gvar

    The parser correctly identified that no interpolation should happen, but failed to include the hash symbol in the result.
    grddev committed Dec 21, 2017
    Copy the full SHA
    e0ad1e4 View commit details
  3. Copy the full SHA
    78291a6 View commit details

Commits on Dec 22, 2017

  1. Fix handling of invalid gvar reference in heredoc

    For both the main Ruby parser and Ripper
    grddev committed Dec 22, 2017
    Copy the full SHA
    766bfce View commit details
  2. Tweak when down next/pushback

    The logic makes more sense to me, and seems to work better with the cryptic test cases this introduces.
    grddev committed Dec 22, 2017
    Copy the full SHA
    9d8877a View commit details
  3. Copy the full SHA
    93b588b View commit details
  4. Merge pull request #4911 from grddev/parsing-fixes

    Miscellaneous parsing fixes
    enebo authored Dec 22, 2017
    Copy the full SHA
    9450508 View commit details
25 changes: 15 additions & 10 deletions core/src/main/java/org/jruby/ext/ripper/HeredocTerm.java
Original file line number Diff line number Diff line change
@@ -166,16 +166,14 @@ public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOExce
ByteList tok = new ByteList();
tok.setEncoding(lexer.getEncoding());
if (c == '#') {
switch (c = lexer.nextc()) {
case '$':
case '@':
lexer.pushback(c);
return RubyParser.tSTRING_DVAR;
case '{':
lexer.commandStart = true;
return RubyParser.tSTRING_DBEG;
int token = lexer.peekVariableName(RipperParser.tSTRING_DVAR, RipperParser.tSTRING_DBEG);

if (token != 0) {
return token;
} else {
tok.append(c);
c = lexer.nextc();
}
tok.append('#');
}

// MRI has extra pointer which makes our code look a little bit more strange in comparison
@@ -195,7 +193,14 @@ public int parseString(RipperLexer lexer, LexerSource src) throws java.io.IOExce
return RubyParser.tSTRING_CONTENT;
}
tok.append(lexer.nextc());


if (lexer.getHeredocIndent() > 0) {
lexer.lex_goto_eol();
lexer.setValue(lexer.createStr(tok, 0));
lexer.flush_string_content(enc[0]);
return RubyParser.tSTRING_CONTENT;
}

if ((c = lexer.nextc()) == EOF) return error(lexer, len, str, eos);
} while (!lexer.whole_match_p(eos, indent));
str = tok;
8 changes: 0 additions & 8 deletions core/src/main/java/org/jruby/ext/ripper/RipperLexer.java
Original file line number Diff line number Diff line change
@@ -491,14 +491,6 @@ private int parseQuote(int c) throws IOException {
begin = '\0';
}

// consume spaces here to record them as part of token
int w = nextc();
while (Character.isWhitespace(w)) {
value = value + (char) w;
w = nextc();
}
pushback(w);

switch (c) {
case 'Q':
lex_strterm = new StringTerm(str_dquote, begin ,end);
6 changes: 3 additions & 3 deletions core/src/main/java/org/jruby/ext/ripper/RipperParserBase.java
Original file line number Diff line number Diff line change
@@ -111,10 +111,10 @@ public IRubyObject arg_var(IRubyObject identifier) {

public IRubyObject assignableConstant(IRubyObject value) {
if (isInDef() || isInSingle()) {
return dispatch("on_assign_error", value);
} else {
return value;
value = dispatch("on_assign_error", value);
error();
}
return value;
}

public IRubyObject assignableIdentifier(IRubyObject value) {
19 changes: 8 additions & 11 deletions core/src/main/java/org/jruby/ext/ripper/StringTerm.java
Original file line number Diff line number Diff line change
@@ -105,19 +105,16 @@ public int parseString(RipperLexer lexer, LexerSource src) throws IOException {
}

if ((flags & STR_FUNC_EXPAND) != 0 && c == '#') {
c = lexer.nextc();
switch (c) {
case '$':
case '@':
lexer.pushback(c);
return RubyParser.tSTRING_DVAR;
case '{':
return RubyParser.tSTRING_DBEG;
int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG);

if (token != 0) {
return token;
} else {
buffer.append(c);
}
buffer.append((byte) '#');
} else {
lexer.pushback(c);
}
lexer.pushback(c);

Encoding enc[] = new Encoding[1];
enc[0] = lexer.getEncoding();

72 changes: 72 additions & 0 deletions core/src/main/java/org/jruby/lexer/LexingCommon.java
Original file line number Diff line number Diff line change
@@ -291,6 +291,78 @@ public boolean isASCII(int c) {
return Encoding.isMbcAscii((byte) c);
}

// Return of 0 means failed to find anything. Non-zero means return that from lexer.
public int peekVariableName(int tSTRING_DVAR, int tSTRING_DBEG) throws IOException {
int c = nextc(); // byte right after #
int significant = -1;
switch (c) {
case '$': { // we unread back to before the $ so next lex can read $foo
int c2 = nextc();

if (c2 == '-') {
int c3 = nextc();

if (c3 == EOF) {
pushback(c3); pushback(c2);
return 0;
}

significant = c3; // $-0 potentially
pushback(c3); pushback(c2);
break;
} else if (isGlobalCharPunct(c2)) { // $_ potentially
setValue("#" + (char) c2);

pushback(c2); pushback(c);
return tSTRING_DVAR;
}

significant = c2; // $FOO potentially
pushback(c2);
break;
}
case '@': { // we unread back to before the @ so next lex can read @foo
int c2 = nextc();

if (c2 == '@') {
int c3 = nextc();

if (c3 == EOF) {
pushback(c3); pushback(c2);
return 0;
}

significant = c3; // #@@foo potentially
pushback(c3); pushback(c2);
break;
}

significant = c2; // #@foo potentially
pushback(c2);
break;
}
case '{':
//setBraceNest(getBraceNest() + 1);
setValue("#" + (char) c);
commandStart = true;
return tSTRING_DBEG;
default:
// We did not find significant char after # so push it back to
// be processed as an ordinary string.
pushback(c);
return 0;
}

pushback(c);

if (significant != -1 && Character.isAlphabetic(significant) || significant == '_') {
setValue("#" + significant);
return tSTRING_DVAR;
}

return 0;
}

// FIXME: I added number gvars here and they did not.
public boolean isGlobalCharPunct(int c) {
switch (c) {
16 changes: 7 additions & 9 deletions core/src/main/java/org/jruby/lexer/yacc/HeredocTerm.java
Original file line number Diff line number Diff line change
@@ -148,16 +148,14 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
ByteList tok = new ByteList();
tok.setEncoding(lexer.getEncoding());
if (c == '#') {
switch (c = lexer.nextc()) {
case '$':
case '@':
lexer.pushback(c);
return RubyParser.tSTRING_DVAR;
case '{':
lexer.commandStart = true;
return RubyParser.tSTRING_DBEG;
int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG);

if (token != 0) {
return token;
} else {
tok.append(c);
c = lexer.nextc();
}
tok.append('#');
}

// MRI has extra pointer which makes our code look a little bit more strange in comparison
82 changes: 8 additions & 74 deletions core/src/main/java/org/jruby/lexer/yacc/StringTerm.java
Original file line number Diff line number Diff line change
@@ -91,77 +91,6 @@ private int endFound(RubyLexer lexer) throws IOException {
return RubyParser.tSTRING_END;
}

// Return of 0 means failed to find anything. Non-zero means return that from lexer.
private int parsePeekVariableName(RubyLexer lexer) throws IOException {
int c = lexer.nextc(); // byte right after #
int significant = -1;
switch (c) {
case '$': { // we unread back to before the $ so next lex can read $foo
int c2 = lexer.nextc();

if (c2 == '-') {
int c3 = lexer.nextc();

if (c3 == EOF) {
lexer.pushback(c3); lexer.pushback(c2);
return 0;
}

significant = c3; // $-0 potentially
lexer.pushback(c3); lexer.pushback(c2);
break;
} else if (lexer.isGlobalCharPunct(c2)) { // $_ potentially
lexer.setValue("#" + (char) c2);

lexer.pushback(c2); lexer.pushback(c);
return RubyParser.tSTRING_DVAR;
}

significant = c2; // $FOO potentially
lexer.pushback(c2);
break;
}
case '@': { // we unread back to before the @ so next lex can read @foo
int c2 = lexer.nextc();

if (c2 == '@') {
int c3 = lexer.nextc();

if (c3 == EOF) {
lexer.pushback(c3); lexer.pushback(c2);
return 0;
}

significant = c3; // #@@foo potentially
lexer.pushback(c3); lexer.pushback(c2);
break;
}

significant = c2; // #@foo potentially
lexer.pushback(c2);
break;
}
case '{':
//lexer.setBraceNest(lexer.getBraceNest() + 1);
lexer.setValue("#" + (char) c);
lexer.commandStart = true;
return RubyParser.tSTRING_DBEG;
default:
// We did not find significant char after # so push it back to
// be processed as an ordinary string.
lexer.pushback(c);
return 0;
}

if (significant != -1 && Character.isAlphabetic(significant) || significant == '_') {
lexer.pushback(c);
lexer.setValue("#" + significant);
return RubyParser.tSTRING_DVAR;
}

return 0;
}

public int parseString(RubyLexer lexer) throws IOException {
boolean spaceSeen = false;
int c;
@@ -188,11 +117,16 @@ public int parseString(RubyLexer lexer) throws IOException {
ByteList buffer = createByteList(lexer);
lexer.newtok(true);
if ((flags & STR_FUNC_EXPAND) != 0 && c == '#') {
int token = parsePeekVariableName(lexer);
int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG);

if (token != 0) return token;
if (token != 0) {
return token;
} else {
buffer.append(c);
}
} else {
lexer.pushback(c);
}
lexer.pushback(c);

Encoding enc[] = new Encoding[1];
enc[0] = lexer.getEncoding();
11 changes: 11 additions & 0 deletions test/jruby/test_parsing.rb
Original file line number Diff line number Diff line change
@@ -35,4 +35,15 @@ def test_parse_of_do_symbol

def foo(*args)
end

def test_parse_invalid_gvar
assert_equal '# comment', eval('"# comment"')
assert_equal '#$', eval('%{#$}')
assert_equal '##$', eval('%{##$}')
assert_equal '#${', eval('"#${"')
assert_equal ' #${', eval('" #${"')
assert_equal ' # ##${', eval('" # ##${"')
assert_equal " \#${\n", eval("<<E\n \#$\{\nE\n")
assert_equal " # #\#${\n", eval("<<E\n \# \#\#$\{\nE\n")
end
end
25 changes: 25 additions & 0 deletions test/jruby/test_ripper.rb
Original file line number Diff line number Diff line change
@@ -53,5 +53,30 @@ def test_command_args
def test_heredoc
assert_equal [:on_string_content, [:@tstring_content, "x\n", [2, 0]]], extract("<<EOS\nx\nEOS\n", :on_string_content)
assert_equal [:on_string_content, [:string_embexpr, [[:vcall, [:@ident, "o", [2, 2]]]]], [:@tstring_content, "x\n", [2, 4]]], extract("<<EOS\n\#{o}x\nEOS\n", :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, "A ", [2, 2]], [:string_embexpr, [[:vcall, [:@ident, "a", [2, 6]]]]], [:@tstring_content, "\n", [2, 8]], [:@tstring_content, "", [3, 2]], [:string_embexpr, [[:vcall, [:@ident, "b", [3, 4]]]]], [:@tstring_content, "\n", [3, 6]]], extract("<<~EOS\n A \#{a}\n \#{b}\nEOS\n", :on_string_content)
end

def test_dyn_const_lhs
assert_equal nil, Ripper.sexp("def m;C=s;end")
end

def test_literal_whitespace
assert_equal [:on_string_content, [:@tstring_content, "\n", [1, 2]]], extract("%{\n}", :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, " ", [1, 2]]], extract("%[ ]", :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, "\t", [1, 2]]], extract("%(\t)", :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, "\r", [1, 2]]], extract("%|\r|", :on_string_content)
end

def test_invalid_gvar
assert_equal [:on_string_content, [:@tstring_content, '# comment', [1, 1]]], extract('"# comment"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '#', [1, 1]]], extract('"#"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '##', [1, 1]]], extract('"##"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '#${', [1, 1]]], extract('"#${"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '#', [1, 1]], [:@tstring_content, '#${', [1, 2]]], extract('"##${"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, ' ', [1, 1]], [:@tstring_content, '#${', [1, 2]]], extract('" #${"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '#${ ', [1, 1]]], extract('"#${ "', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '# #', [1, 1]], [:@tstring_content, '#${', [1, 4]]], extract('"# ##${"', :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, "\#$}\n", [2, 0]]], extract("<<E\n\#$}\nE\n", :on_string_content)
assert_equal [:on_string_content, [:@tstring_content, '# #', [2, 0]], [:@tstring_content, "\#${\n", [2, 3]]], extract("<<E\n\# \#\#${\nE\n", :on_string_content)
end
end