Follow up to fix for #5062. Port back common logic for heredoc bogus

interpolation characters fix.
jruby · Feb 24, 2018 · b8e7856 · b8e7856
1 parent 9ab5f65
commit b8e7856
Showing 3 changed files with 76 additions and 79 deletions.
diff --git a/core/src/main/java/org/jruby/lexer/LexingCommon.java b/core/src/main/java/org/jruby/lexer/LexingCommon.java
@@ -232,6 +232,75 @@ public boolean isASCII(int c) {
         return Encoding.isMbcAscii((byte) c);
     }
 
+    // Return of 0 means failed to find anything.  Non-zero means return that from lexer.
+    public int peekVariableName(int tSTRING_DVAR, int tSTRING_DBEG) throws IOException {
+        int c = nextc(); // byte right after #
+        int significant = -1;
+        switch (c) {
+            case '$': {  // we unread back to before the $ so next lex can read $foo
+                int c2 = nextc();
+
+                if (c2 == '-') {
+                    int c3 = nextc();
+
+                    if (c3 == EOF) {
+                        pushback(c3); pushback(c2);
+                        return 0;
+                    }
+
+                    significant = c3;                              // $-0 potentially
+                    pushback(c3); pushback(c2);
+                    break;
+                } else if (isGlobalCharPunct(c2)) {          // $_ potentially
+                    setValue("#" + (char) c2);
+
+                    pushback(c2); pushback(c);
+                    return tSTRING_DVAR;
+                }
+
+                significant = c2;                                  // $FOO potentially
+                pushback(c2);
+                break;
+            }
+            case '@': {  // we unread back to before the @ so next lex can read @foo
+                int c2 = nextc();
+
+                if (c2 == '@') {
+                    int c3 = nextc();
+
+                    if (c3 == EOF) {
+                        pushback(c3); pushback(c2);
+                        return 0;
+                    }
+
+                    significant = c3;                                // #@@foo potentially
+                    pushback(c3); pushback(c2);
+                    break;
+                }
+
+                significant = c2;                                    // #@foo potentially
+                pushback(c2);
+                break;
+            }
+            case '{':
+                //setBraceNest(getBraceNest() + 1);
+                setValue("#" + (char) c);
+                commandStart = true;
+                return tSTRING_DBEG;
+            default:
+                return 0;
+        }
+
+        // We found #@, #$, #@@ but we don't know what at this point (check for valid chars).
+        if (significant != -1 && Character.isAlphabetic(significant) || significant == '_') {
+            pushback(c);
+            setValue("#" + significant);
+            return tSTRING_DVAR;
+        }
+
+        return 0;
+    }
+
     // FIXME: I added number gvars here and they did not.
     public boolean isGlobalCharPunct(int c) {
         switch (c) {

diff --git a/core/src/main/java/org/jruby/lexer/yacc/HeredocTerm.java b/core/src/main/java/org/jruby/lexer/yacc/HeredocTerm.java
@@ -30,6 +30,7 @@
 package org.jruby.lexer.yacc;
 
 import org.jcodings.Encoding;
+import org.jruby.parser.RubyParser;
 import org.jruby.parser.Tokens;
 import org.jruby.util.ByteList;
 
@@ -148,15 +149,10 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
             ByteList tok = new ByteList();
             tok.setEncoding(lexer.getEncoding());
             if (c == '#') {
-                switch (c = lexer.nextc()) {
-                    case '$':
-                    case '@':
-                        lexer.pushback(c);
-                        return Tokens.tSTRING_DVAR;
-                    case '{':
-                        lexer.commandStart = true;
-                        return Tokens.tSTRING_DBEG;
-                }
+                int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG);
+
+                if (token != 0) return token;
+
                 tok.append('#');
             }
 

diff --git a/core/src/main/java/org/jruby/lexer/yacc/StringTerm.java b/core/src/main/java/org/jruby/lexer/yacc/StringTerm.java
@@ -31,6 +31,7 @@
 import org.jcodings.Encoding;
 import org.jruby.ast.RegexpNode;
 import org.jruby.lexer.yacc.SyntaxException.PID;
+import org.jruby.parser.RubyParser;
 import org.jruby.parser.Tokens;
 import org.jruby.util.ByteList;
 import org.jruby.util.KCode;
@@ -91,75 +92,6 @@ private int endFound(RubyLexer lexer) throws IOException {
             return Tokens.tSTRING_END;
     }
 
-    // Return of 0 means failed to find anything.  Non-zero means return that from lexer.
-    private int parsePeekVariableName(RubyLexer lexer) throws IOException {
-        int c = lexer.nextc(); // byte right after #
-        int significant = -1;
-        switch (c) {
-            case '$': {  // we unread back to before the $ so next lex can read $foo
-                int c2 = lexer.nextc();
-
-                if (c2 == '-') {
-                    int c3 = lexer.nextc();
-
-                    if (c3 == EOF) {
-                        lexer.pushback(c3); lexer.pushback(c2);
-                        return 0;
-                    }
-
-                    significant = c3;                              // $-0 potentially
-                    lexer.pushback(c3); lexer.pushback(c2);
-                    break;
-                } else if (lexer.isGlobalCharPunct(c2)) {          // $_ potentially
-                    lexer.setValue("#" + (char) c2);
-
-                    lexer.pushback(c2); lexer.pushback(c);
-                    return Tokens.tSTRING_DVAR;
-                }
-
-                significant = c2;                                  // $FOO potentially
-                lexer.pushback(c2);
-                break;
-            }
-            case '@': {  // we unread back to before the @ so next lex can read @foo
-                int c2 = lexer.nextc();
-
-                if (c2 == '@') {
-                    int c3 = lexer.nextc();
-
-                    if (c3 == EOF) {
-                        lexer.pushback(c3); lexer.pushback(c2);
-                        return 0;
-                    }
-
-                    significant = c3;                                // #@@foo potentially
-                    lexer.pushback(c3); lexer.pushback(c2);
-                    break;
-                }
-
-                significant = c2;                                    // #@foo potentially
-                lexer.pushback(c2);
-                break;
-            }
-            case '{':
-                //lexer.setBraceNest(lexer.getBraceNest() + 1);
-                lexer.setValue("#" + (char) c);
-                lexer.commandStart = true;
-                return Tokens.tSTRING_DBEG;
-            default:
-                return 0;
-        }
-
-        // We found #@, #$, #@@ but we don't know what at this point (check for valid chars).
-        if (significant != -1 && Character.isAlphabetic(significant) || significant == '_') {
-            lexer.pushback(c);
-            lexer.setValue("#" + significant);
-            return Tokens.tSTRING_DVAR;
-        }
-
-        return 0;
-    }
-
     public int parseString(RubyLexer lexer) throws IOException {
         boolean spaceSeen = false;
         int c;
@@ -186,7 +118,7 @@ public int parseString(RubyLexer lexer) throws IOException {
         ByteList buffer = createByteList(lexer);
         lexer.newtok(true);
         if ((flags & STR_FUNC_EXPAND) != 0 && c == '#') {
-            int token = parsePeekVariableName(lexer);
+            int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG);
 
             if (token != 0) return token;