jruby · Sep 22, 2015 · Sep 22, 2015
Showing with 35 additions and 17 deletions.

+20 −5 core/src/main/java/org/jruby/ast/SymbolNode.java

+13 −1 core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java

+1 −10 core/src/main/java/org/jruby/parser/ParserSupport.java

+1 −1 lib/ruby/stdlib/io/console.rb
diff --git a/core/src/main/java/org/jruby/ast/SymbolNode.java b/core/src/main/java/org/jruby/ast/SymbolNode.java
@@ -33,6 +33,7 @@
 package org.jruby.ast;
 
 import java.awt.image.ByteLookupTable;
+import java.nio.charset.Charset;
 import java.util.List;
 
 import org.jcodings.Encoding;
@@ -48,6 +49,7 @@
 import org.jruby.runtime.ThreadContext;
 import org.jruby.runtime.builtin.IRubyObject;
 import org.jruby.util.ByteList;
+import org.jruby.util.StringSupport;
 
 /**
  * Represents a symbol (:symbol_name).
@@ -56,14 +58,27 @@ public class SymbolNode extends Node implements ILiteralNode, INameNode {
     private String name;
     private Encoding encoding;
 
-    public SymbolNode(ISourcePosition position, ByteList value) {
+    // Interned ident path (e.g. [':', ident]).
+    public SymbolNode(ISourcePosition position, String name, Encoding encoding, int cr) {
         super(position, false);
-        this.name = value.toString().intern();
-        // FIXME: A full scan to determine whether we should back off to US-ASCII.  Lexer should just do this properly.
-        if (value.lengthEnc() == value.length()) {
+        this.name = name;  // Assumed all names are already intern'd by lexer.
+
+        if (encoding == USASCIIEncoding.INSTANCE || cr == StringSupport.CR_7BIT) {
             this.encoding = USASCIIEncoding.INSTANCE;
         } else {
-            this.encoding = value.getEncoding();
+            this.encoding = encoding;
+        }
+    }
+
+    // String path (e.g. [':', str_beg, str_content, str_end])
+    public SymbolNode(ISourcePosition position, ByteList value) {
+        super(position, false);
+        this.name = value.toString().intern();
+
+        if (value.getEncoding() != USASCIIEncoding.INSTANCE) {
+            int size = value.realSize();
+            this.encoding = value.getEncoding().strLength(value.unsafeBytes(), value.begin(), size) == size ?
+                    USASCIIEncoding.INSTANCE : value.getEncoding();
         }
     }
 

diff --git a/core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java b/core/src/main/java/org/jruby/lexer/yacc/RubyLexer.java
@@ -286,9 +286,16 @@ public static Keyword getKeyword(String str) {
     private LexState lex_state;
     private LexState last_state;
     public ISourcePosition tokline;
+    private int tokenCR;
+
+    public int getTokenCR() {
+        return tokenCR;
+    }
 
     public void newtok(boolean unreadOnce) {
         tokline = getPosition();
+        // We assume all idents are 7BIT until they aren't.
+        tokenCR = StringSupport.CR_7BIT;
 
         tokp = lex_p - (unreadOnce ? 1 : 0); // We use tokp of ripper to mark beginning of tokens.
     }
@@ -2706,7 +2713,12 @@ public void readUTFEscapeRegexpLiteral(ByteList buffer) throws IOException {
     public boolean tokadd_mbchar(int first_byte) {
         int length = precise_mbclen();
 
-        if (length <= 0) compile_error("invalid multibyte char (" + current_enc + ")");
+
+        if (length <= 0) {
+            compile_error("invalid multibyte char (" + current_enc + ")");
+        } else if (length > 1) {
+            tokenCR = StringSupport.CR_VALID;
+        }
 
         lex_p += length - 1;  // we already read first byte so advance pointer for remainder
 

diff --git a/core/src/main/java/org/jruby/parser/ParserSupport.java b/core/src/main/java/org/jruby/parser/ParserSupport.java
@@ -882,16 +882,7 @@ public DStrNode createDStrNode(ISourcePosition position) {
     }
 
     public Node asSymbol(ISourcePosition position, String value) {
-        // FIXME: tLABEL and identifiers could return ByteList and not String and make String on-demand for method names
-        // or lvars.  This would prevent this re-extraction of bytes from a string with proper charset
-        try {
-            Charset charset = lexer.getEncoding().getCharset();
-            if (charset != null) return new SymbolNode(position, new ByteList(value.getBytes(charset), lexer.getEncoding()));
-        } catch (UnsupportedCharsetException e) {}
-
-        // for non-charsets we are screwed here since bytes will file.encoding and not what we read them as (see above FIXME for
-        // a much more invasive solution.
-        return new SymbolNode(position, new ByteList(value.getBytes(), lexer.getEncoding()));
+        return new SymbolNode(position, value, lexer.getEncoding(), lexer.getTokenCR());
     }
 
     public Node asSymbol(ISourcePosition position, Node value) {

diff --git a/lib/ruby/stdlib/io/console.rb b/lib/ruby/stdlib/io/console.rb
@@ -156,7 +156,7 @@ def self.console(sym = nil)
           con = @console
         end
 
-        if !con.kind_of?(File) || !con.open? || !con.readable? # MRI checks IO internals here
+        if !con.kind_of?(File) || (con.kind_of?(IO) && !con.open? || !con.readable?) # MRI checks IO internals here
           remove_instance_variable :@console if defined?(@console)
           con = nil
         end