Merge remote-tracking branch 'github/master' into prepend

jruby · Feb 26, 2013 · f1ff8dd · f1ff8dd
2 parents d4f1931 + 0001ac3
commit f1ff8dd
Showing 10 changed files with 570 additions and 499 deletions.
diff --git a/spec/regression/JRUBY-5122_nonblocking_io_spec.rb b/spec/regression/JRUBY-5122_nonblocking_io_spec.rb
@@ -214,7 +214,7 @@
   #   Oracle's build block with > 131072 (2**17)
   # On a Windows 7(64) box:
   #   Oracle's build does not block (use memory till OOMException)
-  SOCKET_CHANNEL_MIGHT_BLOCK = "a" * (65536 * 4)
+  SOCKET_CHANNEL_MIGHT_BLOCK = "a" * (219463 * 4)
 
   it "should not block for write" do
   100.times do # for acceleration; it failed w/o wait_for_accepted call

diff --git a/src/org/jruby/ext/ripper/RipperLexer.java b/src/org/jruby/ext/ripper/RipperLexer.java
@@ -433,6 +433,10 @@ private boolean isEND() {
     private boolean isARG() {
         return lex_state == LexState.EXPR_ARG || lex_state == LexState.EXPR_CMDARG;
     }
+
+    private boolean isSpaceArg(int c, boolean spaceSeen) {
+        return isARG() && spaceSeen && !Character.isWhitespace(c);
+    }
 
     private void determineExpressionState() {
         switch (lex_state) {
@@ -1121,6 +1125,10 @@ private int yylex() throws IOException {
         boolean spaceSeen = false;
         boolean commandState;
 
+        // FIXME: Sucks we do this n times versus one since it is only important at beginning of parse but we need to change
+        // setup of parser differently.
+        if (token == 0 && src.getLine() == 0) detectUTF8BOM();        
+
         if (lex_strterm != null) {
             int tok = lex_strterm.parseString(this, src);
             if (tok == Tokens.tSTRING_END || tok == Tokens.tREGEXP_END) {
@@ -1413,7 +1421,7 @@ private int ampersand(boolean spaceSeen) throws IOException {
         //if the warning is generated, the getPosition() on line 954 (this line + 18) will create
         //a wrong position if the "inclusive" flag is not set.
         ISourcePosition tmpPosition = getPosition();
-        if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
+        if (isSpaceArg(c, spaceSeen)) {
             IRubyWarnings warnings = getRuntime().getWarnings();
             if (warnings.isVerbose()) warnings.warning(IRubyWarnings.ID.ARGUMENT_AS_PREFIX, tmpPosition, "`&' interpreted as argument prefix");
             c = Tokens.tAMPER;
@@ -1953,7 +1961,7 @@ private int minus(boolean spaceSeen) throws IOException {
             setState(LexState.EXPR_ARG);
             return Tokens.tLAMBDA;
         }
-        if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
+        if (isBEG() || isSpaceArg(c, spaceSeen)) {
             if (isARG()) arg_ambiguous();
             setState(LexState.EXPR_BEG);
             src.unread(c);
@@ -1978,7 +1986,7 @@ private int percent(boolean spaceSeen) throws IOException {
             return Tokens.tOP_ASGN;
         }
 
-        if (isARG() && spaceSeen && !Character.isWhitespace(c)) return parseQuote(c);
+        if (isSpaceArg(c, spaceSeen)) return parseQuote(c);
 
         determineExpressionState();
 
@@ -2026,7 +2034,7 @@ private int plus(boolean spaceSeen) throws IOException {
             return Tokens.tOP_ASGN;
         }
 
-        if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
+        if (isBEG() || isSpaceArg(c, spaceSeen)) {
             if (isARG()) arg_ambiguous();
             setState(LexState.EXPR_BEG);
             src.unread(c);
@@ -2161,12 +2169,10 @@ private int slash(boolean spaceSeen) throws IOException {
             return Tokens.tOP_ASGN;
         }
         src.unread(c);
-        if (isARG() && spaceSeen) {
-            if (!Character.isWhitespace(c)) {
-                arg_ambiguous();
-                lex_strterm = new StringTerm(str_regexp, '\0', '/');
-                return Tokens.tREGEXP_BEG;
-            }
+        if (isSpaceArg(c, spaceSeen)) {
+            arg_ambiguous();
+            lex_strterm = new StringTerm(str_regexp, '\0', '/');
+            return Tokens.tREGEXP_BEG;
         }
 
         determineExpressionState();
@@ -2191,7 +2197,7 @@ private int star(boolean spaceSeen) throws IOException {
             return Tokens.tOP_ASGN;
         default:
             src.unread(c);
-            if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
+            if (isSpaceArg(c, spaceSeen)) {
                 IRubyWarnings warnings = getRuntime().getWarnings();
 
                 if (warnings.isVerbose()) warnings.warning(IRubyWarnings.ID.ARGUMENT_AS_PREFIX, getPosition(), "`*' interpreted as argument prefix");
@@ -2535,11 +2541,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
                     buffer.setEncoding(UTF8_ENCODING);
                     if (stringLiteral) tokenAddMBC(codepoint, buffer);
                 } else if (stringLiteral) {
-                    if (codepoint == 0 && symbolLiteral) {
-                        throw new SyntaxException(SyntaxException.PID.INVALID_ESCAPE_SYNTAX, getPosition(),
-                            getCurrentLine(), "symbol cannot contain '\\u0000'");
-                    }
-
                     buffer.append((char) codepoint);
                 }
             } while (src.peek(' ') || src.peek('\t'));
@@ -2555,11 +2556,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
                 buffer.setEncoding(UTF8_ENCODING);
                 if (stringLiteral) tokenAddMBC(codepoint, buffer);
             } else if (stringLiteral) {
-                if (codepoint == 0 && symbolLiteral) {
-                    throw new SyntaxException(SyntaxException.PID.INVALID_ESCAPE_SYNTAX, getPosition(),
-                        getCurrentLine(), "symbol cannot contain '\\u0000'");
-                }
-
                 buffer.append((char) codepoint);
             }
         }
@@ -2711,4 +2707,27 @@ private char scanOct(int count) throws IOException {
 
         return value;
     }
+
+    // FIXME: Also sucks that matchMarker will strip off valuable bytes and not work for this (could be a one-liner)
+    private void detectUTF8BOM() throws IOException {
+        int b1 = src.read();
+        if (b1 == 0xef) {
+            int b2 = src.read();
+            if (b2 == 0xbb) {
+                int b3 = src.read();
+                if (b3 == 0xbf) {
+                    setEncoding(UTF8_ENCODING);
+                } else {
+                    src.unread(b3);
+                    src.unread(b2);
+                    src.unread(b1);
+                }
+            } else {
+                src.unread(b2);
+                src.unread(b1);
+            }
+        } else {
+            src.unread(b1);
+        }
+    }    
 }
diff --git a/src/org/jruby/internal/runtime/methods/DefaultMethod.java b/src/org/jruby/internal/runtime/methods/DefaultMethod.java
@@ -142,8 +142,6 @@ private DynamicMethod tryJitReturnMethod(ThreadContext context) {
             // use the class name
             className = implementationClass.getName();
         }
-        // replace double-colons with dots, to match Java
-        className.replaceAll("::", ".");
         context.runtime.getJITCompiler().tryJIT(this, context, className, name);
         return box.actualMethod;
     }

diff --git a/src/org/jruby/lexer/yacc/RubyYaccLexer.java b/src/org/jruby/lexer/yacc/RubyYaccLexer.java
@@ -159,6 +159,29 @@ private void warn_balanced(int c, boolean spaceSeen, String op, String syn) {
             ambiguousOperator(op, syn);
         }
     }
+
+    // FIXME: Also sucks that matchMarker will strip off valuable bytes and not work for this (could be a one-liner)
+    private void detectUTF8BOM() throws IOException {
+        int b1 = src.read();
+        if (b1 == 0xef) {
+            int b2 = src.read();
+            if (b2 == 0xbb) {
+                int b3 = src.read();
+                if (b3 == 0xbf) {
+                    setEncoding(UTF8_ENCODING);
+                } else {
+                    src.unread(b3);
+                    src.unread(b2);
+                    src.unread(b1);
+                }
+            } else {
+                src.unread(b2);
+                src.unread(b1);
+            }
+        } else {
+            src.unread(b1);
+        }
+    }
 
     public enum Keyword {
         END ("end", Tokens.kEND, Tokens.kEND, LexState.EXPR_END),
@@ -275,6 +298,7 @@ public static Keyword getKeyword(String str) {
 
     // Are we lexing Ruby 1.8 or 1.9+ syntax
     private boolean isOneEight;
+    private boolean isTwoZero;
     // Count of nested parentheses (1.9 only)
     private int parenNest = 0;
     // 1.9 only
@@ -311,6 +335,7 @@ public final void reset() {
         resetStacks();
         lex_strterm = null;
         commandStart = true;
+        if (parserSupport != null) isTwoZero = parserSupport.getConfiguration().getVersion().is2_0();
     }
 
     public int nextToken() throws IOException {
@@ -467,6 +492,10 @@ private boolean isEND() {
     private boolean isARG() {
         return lex_state == LexState.EXPR_ARG || lex_state == LexState.EXPR_CMDARG;
     }
+
+    private boolean isSpaceArg(int c, boolean spaceSeen) {
+        return isARG() && spaceSeen && !Character.isWhitespace(c);
+    }
 
     private void determineExpressionState() {
         switch (lex_state) {
@@ -621,10 +650,26 @@ private int parseQuote(int c) throws IOException {
             setState(LexState.EXPR_FNAME);
             yaccValue = new Token("%"+c+begin, getPosition());
             return Tokens.tSYMBEG;
-
+
+        case 'I':
+            if (isTwoZero) {
+                lex_strterm = new StringTerm(str_dquote | STR_FUNC_QWORDS, begin, end);
+                do {c = src.read();} while (Character.isWhitespace(c));                
+                src.unread(c);
+                yaccValue = new Token("%" + c + begin, getPosition());
+                return Tokens.tSYMBOLS_BEG;
+            }
+        case 'i':
+            if (isTwoZero) {
+                lex_strterm = new StringTerm(/* str_squote | */STR_FUNC_QWORDS, begin, end);
+                do {c = src.read();} while (Character.isWhitespace(c));
+                src.unread(c);
+                yaccValue = new Token("%" + c + begin, getPosition());
+                return Tokens.tQSYMBOLS_BEG;
+            }
         default:
-            throw new SyntaxException(PID.STRING_UNKNOWN_TYPE, getPosition(), getCurrentLine(),
-                    "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'.");
+            throw new SyntaxException(PID.STRING_UNKNOWN_TYPE, 
+                        getPosition(), getCurrentLine(), "unknown type of %string");
         }
     }
 
@@ -966,7 +1011,7 @@ private int yylex2() throws IOException {
 
         return currentToken;
     }
-
+    
     /**
      *  Returns the next token. Also sets yyVal is needed.
      *
@@ -976,6 +1021,10 @@ private int yylex() throws IOException {
         int c;
         boolean spaceSeen = false;
         boolean commandState;
+
+        // FIXME: Sucks we do this n times versus one since it is only important at beginning of parse but we need to change
+        // setup of parser differently.
+        if (token == 0 && src.getLine() == 0) detectUTF8BOM();
 
         if (lex_strterm != null) {
             int tok = lex_strterm.parseString(this, src);
@@ -1255,7 +1304,7 @@ private int ampersand(boolean spaceSeen) throws IOException {
         //if the warning is generated, the getPosition() on line 954 (this line + 18) will create
         //a wrong position if the "inclusive" flag is not set.
         ISourcePosition tmpPosition = getPosition();
-        if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
+        if (isSpaceArg(c, spaceSeen)) {
             if (warnings.isVerbose()) warnings.warning(ID.ARGUMENT_AS_PREFIX, tmpPosition, "`&' interpreted as argument prefix");
             c = Tokens.tAMPER;
         } else if (isBEG()) {
@@ -1854,7 +1903,7 @@ private int minus(boolean spaceSeen) throws IOException {
             yaccValue = new Token("->", getPosition());
             return Tokens.tLAMBDA;
         }
-        if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
+        if (isBEG() || isSpaceArg(c, spaceSeen)) {
             if (isARG()) arg_ambiguous();
             setState(LexState.EXPR_BEG);
             src.unread(c);
@@ -1881,8 +1930,8 @@ private int percent(boolean spaceSeen) throws IOException {
             yaccValue = new Token("%", getPosition());
             return Tokens.tOP_ASGN;
         }
-        
-        if (isARG() && spaceSeen && !Character.isWhitespace(c)) return parseQuote(c);
+
+        if (isSpaceArg(c, spaceSeen)) return parseQuote(c);
 
         determineExpressionState();
 
@@ -1938,7 +1987,7 @@ private int plus(boolean spaceSeen) throws IOException {
             return Tokens.tOP_ASGN;
         }
 
-        if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
+        if (isBEG() || isSpaceArg(c, spaceSeen)) { //FIXME: arg_ambiguous missing
             if (isARG()) arg_ambiguous();
             setState(LexState.EXPR_BEG);
             src.unread(c);
@@ -2084,13 +2133,11 @@ private int slash(boolean spaceSeen) throws IOException {
             return Tokens.tOP_ASGN;
         }
         src.unread(c);
-        if (isARG() && spaceSeen) {
-            if (!Character.isWhitespace(c)) {
-                arg_ambiguous();
-                lex_strterm = new StringTerm(str_regexp, '\0', '/');
-                yaccValue = new Token("/",getPosition());
-                return Tokens.tREGEXP_BEG;
-            }
+        if (isSpaceArg(c, spaceSeen)) {
+            arg_ambiguous();
+            lex_strterm = new StringTerm(str_regexp, '\0', '/');
+            yaccValue = new Token("/",getPosition());
+            return Tokens.tREGEXP_BEG;
         }
 
         determineExpressionState();
@@ -2110,17 +2157,27 @@ private int star(boolean spaceSeen) throws IOException {
                 yaccValue = new Token("**", getPosition());
                 return Tokens.tOP_ASGN;
             }
-            src.unread(c);
+
+            src.unread(c); // not a '=' put it back
             yaccValue = new Token("**", getPosition());
-            c = Tokens.tPOW;
+
+            if (isTwoZero && isSpaceArg(c, spaceSeen)) {
+                if (warnings.isVerbose()) warnings.warning(ID.ARGUMENT_AS_PREFIX, getPosition(), "`**' interpreted as argument prefix");
+                c = Tokens.tDSTAR;
+            } else if (isTwoZero && isBEG()) {
+                c = Tokens.tDSTAR;
+            } else {
+                if (!isOneEight) warn_balanced(c, spaceSeen, "*", "argument prefix");
+                c = Tokens.tPOW;
+            }
             break;
         case '=':
             setState(LexState.EXPR_BEG);
             yaccValue = new Token("*", getPosition());
             return Tokens.tOP_ASGN;
         default:
             src.unread(c);
-            if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
+            if (isSpaceArg(c, spaceSeen)) {
                 if (warnings.isVerbose()) warnings.warning(ID.ARGUMENT_AS_PREFIX, getPosition(), "`*' interpreted as argument prefix");
                 c = Tokens.tSTAR;
             } else if (isBEG()) {
@@ -2463,11 +2520,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
                     buffer.setEncoding(UTF8_ENCODING);
                     if (stringLiteral) tokenAddMBC(codepoint, buffer);
                 } else if (stringLiteral) {
-                    if (codepoint == 0 && symbolLiteral) {
-                        throw new SyntaxException(PID.INVALID_ESCAPE_SYNTAX, getPosition(),
-                            getCurrentLine(), "symbol cannot contain '\\u0000'");
-                    }
-
                     buffer.append((char) codepoint);
                 }
             } while (src.peek(' ') || src.peek('\t'));
@@ -2483,11 +2535,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
                 buffer.setEncoding(UTF8_ENCODING);
                 if (stringLiteral) tokenAddMBC(codepoint, buffer);
             } else if (stringLiteral) {
-                if (codepoint == 0 && symbolLiteral) {
-                    throw new SyntaxException(PID.INVALID_ESCAPE_SYNTAX, getPosition(),
-                        getCurrentLine(), "symbol cannot contain '\\u0000'");
-                }
-
                 buffer.append((char) codepoint);
             }
         }

diff --git a/src/org/jruby/parser/.#Ruby19Parser.y b/src/org/jruby/parser/.#Ruby19Parser.y
diff --git a/src/org/jruby/parser/ParserSupport.java b/src/org/jruby/parser/ParserSupport.java
@@ -1145,6 +1145,13 @@ public DStrNode createDStrNode(ISourcePosition position) {
         return new DStrNode(position);
     }
 
+    public Node asSymbol(ISourcePosition position, Node value) {
+        // FIXME: This might have an encoding issue since toString generally uses iso-8859-1
+        if (value instanceof StrNode) return new SymbolNode(position, ((StrNode) value).getValue().toString());
+
+        return new DSymbolNode(position, (DStrNode) value);
+    }
+
     public Node literal_concat(ISourcePosition position, Node head, Node tail) { 
         if (head == null) return tail;
         if (tail == null) return head;

diff --git a/src/org/jruby/parser/Ruby20Parser.java b/src/org/jruby/parser/Ruby20Parser.java
diff --git a/src/org/jruby/parser/Ruby20Parser.y b/src/org/jruby/parser/Ruby20Parser.y
@@ -1703,7 +1703,7 @@ symbol_list     : /* none */ {
                     $$ = new ArrayNode(lexer.getPosition());
                 }
                 | symbol_list word ' ' {
-                    $$ = $1.add($2 instanceof EvStrNode ? new DSymbolNode($1.getPosition()).add($2) : $2);
+                    $$ = $1.add($2 instanceof EvStrNode ? new DSymbolNode($1.getPosition()).add($2) : support.asSymbol($1.getPosition(), $2));
                 }
 
 qwords          : tQWORDS_BEG ' ' tSTRING_END {
@@ -1734,7 +1734,7 @@ qsym_list      : /* none */ {
                     $$ = new ArrayNode(lexer.getPosition());
                 }
                 | qsym_list tSTRING_CONTENT ' ' {
-                    $$ = $1.add($2);
+                    $$ = $1.add(support.asSymbol($1.getPosition(), $2));
                 }
 
 string_contents : /* none */ {

diff --git a/src/org/jruby/parser/Tokens.java b/src/org/jruby/parser/Tokens.java
@@ -157,7 +157,10 @@ public interface Tokens {
     int tLAMBDA     = DefaultRubyParser.tLAMBDA;
     int tLAMBEG     = DefaultRubyParser.tLAMBEG;
     int tLABEL      = DefaultRubyParser.tLABEL;
-
+    int tSYMBOLS_BEG = Ruby20Parser.tSYMBOLS_BEG;
+    int tQSYMBOLS_BEG = Ruby20Parser.tQSYMBOLS_BEG;
+    int tDSTAR = Ruby20Parser.tDSTAR;
+
     String[] operators = {"+@", "-@", "**", "<=>", "==", "===", "!=", ">=", "<=", "&&",
                           "||", "=~", "!~", "..", "...", "[]", "[]=", "<<", ">>", "::"};
 }
diff --git a/test/externals/ruby1.9/excludes/TestParse.rb b/test/externals/ruby1.9/excludes/TestParse.rb
@@ -2,7 +2,5 @@
 exclude :test_assign_in_conditional, "needs investigation"
 exclude :test_invalid_char, "needs investigation"
 exclude :test_question, "needs investigation"
-exclude :test_symbol, "needs investigation"
-exclude :test_utf8_bom, "needs investigation"
 exclude :test_void_expr_stmts_value, "needs investigation"
 exclude :test_xstring, "needs investigation"