[Truffle] Tidy up ParserByteList.

jruby · Dec 25, 2016 · 596e63e · 596e63e
1 parent 582ae8c
commit 596e63e
Showing 7 changed files with 105 additions and 56 deletions.
diff --git a/truffle/src/main/java/org/jruby/truffle/parser/ParserByteList.java b/truffle/src/main/java/org/jruby/truffle/parser/ParserByteList.java
@@ -6,55 +6,72 @@
  * Eclipse Public License version 1.0
  * GNU General Public License version 2
  * GNU Lesser General Public License version 2.1
+ *
+ * The contents of this file are subject to the Eclipse Public
+ * License Version 1.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ *
+ * Copyright (C) 2007-2010 JRuby Community
+ * Copyright (C) 2007 Charles O Nutter <headius@headius.com>
+ * Copyright (C) 2007 Nick Sieger <nicksieger@gmail.com>
+ * Copyright (C) 2007 Ola Bini <ola@ologix.com>
+ * Copyright (C) 2007 William N Dortch <bill.dortch@gmail.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the EPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the EPL, the GPL or the LGPL.
  */
 package org.jruby.truffle.parser;
 
 import org.jcodings.Encoding;
+import org.jcodings.ascii.AsciiTables;
+import org.jcodings.specific.USASCIIEncoding;
 import org.jruby.truffle.core.string.ByteList;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
 public class ParserByteList {
 
-    private byte[] bytes;
-    private int start;
-    private int length;
+    private final byte[] bytes;
+    private final int start;
+    private final int length;
     private Encoding encoding;
 
-    public ParserByteList(ByteList byteList) {
-        fromByteList(byteList);
-    }
-
-    public ParserByteList(byte[] bytes, int start, int length, Encoding encoding, boolean copy) {
-        this(new ByteList(bytes, start, length, encoding, copy));
-    }
-
-    public ParserByteList(byte[] bytes, int start, int length, Encoding encoding) {
-        this(new ByteList(bytes, start, length, encoding, false));
-    }
-
     public ParserByteList(byte[] bytes) {
-        this(new ByteList(bytes));
+        this(bytes, 0, bytes.length, USASCIIEncoding.INSTANCE);
     }
 
-    public static ParserByteList create(String string) {
-        return new ParserByteList(ByteList.create(string));
+    public ParserByteList(byte[] bytes, int start, int length, Encoding encoding) {
+        ByteList byteList = new ByteList(bytes, start, length, encoding, false);
+        this.bytes = byteList.bytes();
+        this.start = 0;
+        this.length = byteList.length();
+        this.encoding = byteList.getEncoding();
     }
 
     public int getStart() {
         return start;
     }
 
-    public void setStart(int start) {
-        this.start = start;
-    }
-
     public int getLength() {
         return length;
     }
 
-    public void setLength(int length) {
-        this.length = length;
-    }
-
     public Encoding getEncoding() {
         return encoding;
     }
@@ -68,34 +85,62 @@ public ParserByteList makeShared(int sharedStart, int sharedLength) {
     }
 
     public int caseInsensitiveCmp(ParserByteList other) {
-        return toByteList().caseInsensitiveCmp(other.toByteList());
-    }
+        if (other == this) return 0;
 
-    public ByteList toByteList() {
-        return new ByteList(bytes, start, length, encoding, true);
-    }
+        final int size = length;
+        final int len =  Math.min(size, other.length);
+        final int other_begin = other.start;
+        final byte[] other_bytes = other.bytes;
 
-    private void fromByteList(ByteList byteList) {
-        bytes = byteList.bytes();
-        start = 0;
-        length = byteList.length();
-        encoding = byteList.getEncoding();
+        for (int offset = -1; ++offset < len;) {
+            int myCharIgnoreCase = AsciiTables.ToLowerCaseTable[bytes[start + offset] & 0xff] & 0xff;
+            int otherCharIgnoreCase = AsciiTables.ToLowerCaseTable[other_bytes[other_begin + offset] & 0xff] & 0xff;
+            if (myCharIgnoreCase < otherCharIgnoreCase) {
+                return -1;
+            } else if (myCharIgnoreCase > otherCharIgnoreCase) {
+                return 1;
+            }
+        }
+        return size == other.length ? 0 : size == len ? -1 : 1;
     }
 
     public boolean equal(ParserByteList other) {
-        return toByteList().equals(other.toByteList());
+        if (other == this) return true;
+
+        int first, last;
+        if ((last = length) == other.length) {
+            byte buf[] = bytes;
+            byte otherBuf[] = other.bytes;
+            // scanning from front and back simultaneously, meeting in
+            // the middle. the object is to get a mismatch as quickly as
+            // possible. alternatives might be: scan from the middle outward
+            // (not great because it won't pick up common variations at the
+            // ends until late) or sample odd bytes forward and even bytes
+            // backward (I like this one, but it's more expensive for
+            // strings that are equal; see sample_equals below).
+            first = -1;
+            while (--last > first && buf[start + last] == otherBuf[other.start + last] &&
+                    ++first < last && buf[start + first] == otherBuf[other.start + first]) {
+            }
+            return first >= last;
+        }
+        return false;
     }
 
     public int charAt(int index) {
-        return toByteList().charAt(index);
+        return bytes[start + index];
     }
 
     public String toString() {
-        return toByteList().toString();
+        return new String(Arrays.copyOfRange(bytes, start, length), StandardCharsets.US_ASCII);
+    }
+
+    public ByteList toByteList() {
+        return new ByteList(bytes, start, length, encoding, true);
     }
 
     public ParserByteListBuilder toBuilder() {
-        return new ParserByteListBuilder(toByteList());
+        return new ParserByteListBuilder(Arrays.copyOfRange(bytes, start, length), encoding);
     }
 
 }
diff --git a/truffle/src/main/java/org/jruby/truffle/parser/ParserByteListBuilder.java b/truffle/src/main/java/org/jruby/truffle/parser/ParserByteListBuilder.java
@@ -21,6 +21,13 @@ public class ParserByteListBuilder {
     private int length;
     private Encoding encoding;
 
+    public ParserByteListBuilder(byte[] bytes, Encoding encoding) {
+        this.bytes = bytes;
+        start = 0;
+        length = bytes.length;
+        this.encoding = encoding;
+    }
+
     public ParserByteListBuilder(ByteList byteList) {
         fromByteList(byteList);
     }
@@ -113,6 +120,6 @@ public String toString() {
     }
 
     public ParserByteList toParserByteList() {
-        return new ParserByteList(toByteList());
+        return new ParserByteList(Arrays.copyOfRange(bytes, start, length), 0, length, encoding);
     }
 }
diff --git a/truffle/src/main/java/org/jruby/truffle/parser/lexer/HeredocTerm.java b/truffle/src/main/java/org/jruby/truffle/parser/lexer/HeredocTerm.java
@@ -142,7 +142,7 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
                 lexer.lex_goto_eol();
 
                 if (lexer.getHeredocIndent() > 0) {
-                    lexer.setValue(lexer.createStr(str.toByteList(), 0));
+                    lexer.setValue(lexer.createStr(str, 0));
                     return Tokens.tSTRING_CONTENT;
                 }
                 // MRI null checks str in this case but it is unconditionally non-null?
@@ -176,14 +176,14 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
                     return restore(lexer);
                 }
                 if (c != '\n') {
-                    lexer.setValue(lexer.createStr(tok.toByteList(), 0));
+                    lexer.setValue(lexer.createStr(tok, 0));
                     return Tokens.tSTRING_CONTENT;
                 }
                 tok.append(lexer.nextc());
 
                 if (lexer.getHeredocIndent() > 0) {
                     lexer.lex_goto_eol();
-                    lexer.setValue(lexer.createStr(tok.toByteList(), 0));
+                    lexer.setValue(lexer.createStr(tok, 0));
                     return Tokens.tSTRING_CONTENT;
                 }
 
@@ -194,7 +194,7 @@ public int parseString(RubyLexer lexer) throws java.io.IOException {
 
         lexer.heredoc_restore(this);
         lexer.setStrTerm(new StringTerm(-1, '\0', '\0'));
-        lexer.setValue(lexer.createStr(str.toByteList(), 0));
+        lexer.setValue(lexer.createStr(str, 0));
         return Tokens.tSTRING_CONTENT;
     }
 }
diff --git a/truffle/src/main/java/org/jruby/truffle/parser/lexer/RubyLexer.java b/truffle/src/main/java/org/jruby/truffle/parser/lexer/RubyLexer.java
@@ -483,8 +483,8 @@ private int getIntegerToken(String value, int radix, int suffix) {
         return considerComplex(Tokens.tINTEGER, suffix);
     }
 
-    public StrParseNode createStr(ByteList buffer, int flags) {
-        return createStr(new ParserByteList(buffer), flags);
+    public StrParseNode createStr(ParserByteListBuilder buffer, int flags) {
+        return createStr(buffer.toParserByteList(), flags);
     }
 
     // STR_NEW3/parser_str_new
@@ -2627,7 +2627,7 @@ protected boolean comment_at_top() {
     }
 
     public ParserByteList createTokenByteArrayView() {
-        return new ParserByteList(lexb.toBuilder().getUnsafeBytes(), lexb.getStart() + tokp, lex_p - tokp, getEncoding(), false);
+        return new ParserByteList(lexb.toBuilder().getUnsafeBytes(), lexb.getStart() + tokp, lex_p - tokp, getEncoding());
     }
 
     public String createTokenString(int start) {

diff --git a/truffle/src/main/java/org/jruby/truffle/parser/lexer/StringTerm.java b/truffle/src/main/java/org/jruby/truffle/parser/lexer/StringTerm.java
@@ -86,7 +86,7 @@ private int endFound(RubyLexer lexer) {
 
             if ((flags & STR_FUNC_REGEXP) != 0) {
                 RegexpOptions options = parseRegexpFlags(lexer);
-                ParserByteList regexpBytelist = ParserByteList.create("");
+                ParserByteList regexpBytelist = new ParserByteList(new byte[]{});
 
                 lexer.setValue(new RegexpParseNode(lexer.getPosition(), regexpBytelist.toByteList(), options));
                 return Tokens.tREGEXP_END;
@@ -209,7 +209,7 @@ public int parseString(RubyLexer lexer) throws IOException {
             lexer.compile_error("unterminated string meets end of file");
         }
 
-        lexer.setValue(lexer.createStr(buffer.toByteList(), flags));
+        lexer.setValue(lexer.createStr(buffer, flags));
         return Tokens.tSTRING_CONTENT;
     }
 

diff --git a/truffle/src/main/java/org/jruby/truffle/parser/parser/RubyParser.java b/truffle/src/main/java/org/jruby/truffle/parser/parser/RubyParser.java
@@ -44,6 +44,7 @@
 import org.jruby.truffle.core.string.ByteList;
 import org.jruby.truffle.interop.ForeignCodeNode;
 import org.jruby.truffle.collections.Tuple;
+import org.jruby.truffle.parser.ParserByteList;
 import org.jruby.truffle.parser.RubyWarnings;
 import org.jruby.truffle.parser.TempSourceSection;
 import org.jruby.truffle.parser.ast.ArgsParseNode;
@@ -3777,9 +3778,7 @@ public Object yyparse (RubyLexer yyLex) throws java.io.IOException {
 };
 states[480] = new ParserState() {
   @Override public Object execute(ParserSupport support, RubyLexer lexer, Object yyVal, Object[] yyVals, int yyTop) {
-                    ByteList aChar = ByteList.create("");
-                    aChar.setEncoding(lexer.getEncoding());
-                    yyVal = lexer.createStr(aChar, 0);
+                    yyVal = lexer.createStr(new ParserByteList(new byte[]{}, 0, 0, lexer.getEncoding()), 0);
     return yyVal;
   }
 };

diff --git a/truffle/src/main/java/org/jruby/truffle/parser/parser/RubyParser.y b/truffle/src/main/java/org/jruby/truffle/parser/parser/RubyParser.y
@@ -2001,9 +2001,7 @@ qsym_list      : /* none */ {
                 }
 
 string_contents : /* none */ {
-                    ByteList aChar = ByteList.create("");
-                    aChar.setEncoding(lexer.getEncoding());
-                    $$ = lexer.createStr(aChar, 0);
+                    $$ = lexer.createStr(new ParserByteList(new byte[]{}, 0, 0, lexer.getEncoding()), 0);
                 }
                 | string_contents string_content {
                     $$ = support.literal_concat($1.getPosition(), $1, $<ParseNode>2);