Fix issues with string processing of word types not putting exact space

enebo · enebo · commit 1443fd6797a2 · 2018-03-26T15:25:01.000-05:00
characters for on_space.  Surprisingly icky :)
diff --git a/core/src/main/java/org/jruby/ext/ripper/RipperLexer.java b/core/src/main/java/org/jruby/ext/ripper/RipperLexer.java
@@ -96,8 +96,6 @@ public class RipperLexer extends LexingCommon {
         map.put("__ENCODING__", Keyword.__ENCODING__);
     }
 
-    public boolean ignoreNextScanEvent = false;
-
     protected void ambiguousOperator(String op, String syn) {
         parser.dispatch("on_operator_ambiguous", getRuntime().newSymbol(op), getRuntime().newString(syn));
     }
@@ -248,6 +246,7 @@ public RipperLexer(RipperParserBase parser, LexerSource src) {
     protected ByteList delayed = null;
     private int delayed_line = 0;
     private int delayed_col = 0;
+    private boolean cr_seen = false;
 
     /**
      * Has lexing started yet?
@@ -267,64 +266,76 @@ protected void flush_string_content(Encoding encoding) {
             tokp = lex_p;
         }
     }
-    
-    public int nextc() {
-        if (lex_p == lex_pend) {
-            line_offset += lex_pend;
 
-            ByteList v = lex_nextline;
-            lex_nextline = null;
-            
-            if (v == null) {
-                if (eofp) return EOF;
-                
-                if (src == null || (v = src.gets()) == null) {
-                    eofp = true;
-                    lex_goto_eol();
-                    return EOF;
-                } 
-            }
-        
-            // Left over stuffs...Add to delayed for later processing.
-            if (tokp < lex_pend) {
-                if (delayed == null) {
-                    delayed = new ByteList();
-                    delayed.setEncoding(getEncoding());
-                    delayed.append(lexb, tokp, lex_pend - tokp);
-                    delayed_line = ruby_sourceline;
-                    delayed_col = tokp - lex_pbeg;
-                } else {
-                    delayed.append(lexb, tokp, lex_pend - tokp);
-                }
+    public void addDelayedToken(int tok, int end) {
+        // Left over stuffs...Add to delayed for later processing.
+        if (tok < end) {
+            if (delayed == null) {
+                delayed = new ByteList();
+                delayed.setEncoding(getEncoding());
+                delayed_line = ruby_sourceline;
+                delayed_col = tok - lex_pbeg;
             }
-        
-            if (heredoc_end > 0) {
-                ruby_sourceline = heredoc_end;
-                heredoc_end = 0;
+            delayed.append(lexb, tok, end - tok);
+            tokp = end;
+        }
+    }
+
+    private boolean nextLine() {
+        line_offset += lex_pend;
+
+        ByteList v = lex_nextline;
+        lex_nextline = null;
+
+        if (v == null) {
+            if (eofp) return true;
+
+            if (src == null || (v = src.gets()) == null) {
+                eofp = true;
+                lex_goto_eol();
+                return true;
             }
-            ruby_sourceline++;
-            line_count++;
-            lex_pbeg = lex_p = 0;
-            lex_pend = lex_p + v.length();
-            lexb = v;
-            flush();
-            lex_lastline = v;
+            cr_seen = false;
         }
-        
+
+        addDelayedToken(tokp, lex_pend);
+
+        if (heredoc_end > 0) {
+            ruby_sourceline = heredoc_end;
+            heredoc_end = 0;
+        }
+        ruby_sourceline++;
+        line_count++;
+        lex_pbeg = lex_p = 0;
+        lex_pend = lex_p + v.length();
+        lexb = v;
+        flush();
+        lex_lastline = v;
+
+        return false;
+    }
+
+    private int cr(int c) {
+        if (peek('\n')) {
+            lex_p++;
+            c = '\n';
+        } else if (!cr_seen) {
+            cr_seen = true;
+            warn("encountered \\\\r in middle of line, treated as a mere space");
+        }
+        return c;
+    }
+
+    public int nextc() {
+        if (lex_p == lex_pend || eofp || lex_nextline != null) {
+            if (nextLine()) return EOF;
+        }
+
         int c = p(lex_p);
         lex_p++;
-        if (c == '\r') {
-            if (peek('\n')) {
-                lex_p++;
-                c = '\n';
-            } else if (ruby_sourceline > last_cr_line) {
-                last_cr_line = ruby_sourceline;
-                warn("encountered \\\\r in middle of line, treated as a mere space");
-                c = ' ';
-            }
-        }
 
-//        System.out.println("C: " + (char) c + ", LEXP: " + lex_p + ", PEND: "+ lex_pend);
+        if (c == '\r') c = cr(c);
+
         return c;
     }
     
@@ -714,7 +725,9 @@ private void printToken(int token) {
     }
     
     public boolean hasScanEvent() {
-        if (lex_p < tokp) throw parser.getRuntime().newRuntimeError("lex_p < tokp");
+        if (lex_p < tokp) {
+            throw parser.getRuntime().newRuntimeError("lex_p < tokp");
+        }
         
         return lex_p > tokp;
     }
diff --git a/core/src/main/java/org/jruby/ext/ripper/StringTerm.java b/core/src/main/java/org/jruby/ext/ripper/StringTerm.java
@@ -76,6 +76,7 @@ private int endFound(RipperLexer lexer) throws IOException {
         if ((flags & STR_FUNC_QWORDS) != 0) {
             flags |= STR_FUNC_TERM;
             lexer.pushback(0);
+            lexer.addDelayedToken(lexer.tokp, lexer.lex_p);
             return ' ';
         }
 
@@ -118,7 +119,7 @@ public int parseString(RipperLexer lexer, LexerSource src) throws IOException {
 
         if ((flags & STR_FUNC_TERM) != 0) {
             if ((flags & STR_FUNC_QWORDS) != 0) lexer.nextc(); // delayed terminator char
-            lexer.ignoreNextScanEvent = true;
+            lexer.setState(EXPR_BEG | EXPR_LABEL);
             lexer.setStrTerm(null);
             return ((flags & STR_FUNC_REGEXP) != 0) ? RipperParser.tREGEXP_END : RipperParser.tSTRING_END;
         }
@@ -128,7 +129,6 @@ public int parseString(RipperLexer lexer, LexerSource src) throws IOException {
         c = lexer.nextc();
         if ((flags & STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) {
             do { 
-                buffer.append((char) c);
                 c = lexer.nextc();
             } while (Character.isWhitespace(c));
             spaceSeen = true;
@@ -145,6 +145,7 @@ public int parseString(RipperLexer lexer, LexerSource src) throws IOException {
         
         if (spaceSeen) {
             lexer.pushback(c);
+            lexer.addDelayedToken(lexer.tokp, lexer.lex_p);
             return ' ';
         }