jruby · Dec 23, 2016 · Dec 23, 2016 · Dec 23, 2016
diff --git a/truffle/src/main/java/org/jruby/truffle/core/regexp/ClassicRegexp.java b/truffle/src/main/java/org/jruby/truffle/core/regexp/ClassicRegexp.java
@@ -47,10 +47,12 @@
 import org.joni.WarnCallback;
 import org.joni.exception.JOniException;
 import org.jruby.truffle.RubyContext;
-import org.jruby.truffle.core.rope.CodeRange;
+import org.jruby.truffle.core.rope.Rope;
+import org.jruby.truffle.core.rope.RopeConstants;
+import org.jruby.truffle.core.rope.RopeOperations;
 import org.jruby.truffle.core.string.ByteList;
 import org.jruby.truffle.core.string.ByteListKey;
-import org.jruby.truffle.core.string.EncodingUtils;
+import org.jruby.truffle.core.string.StringOperations;
 import org.jruby.truffle.core.string.StringSupport;
 import org.jruby.truffle.parser.ReOptions;
 import org.jruby.truffle.collections.WeakValuedMap;
@@ -59,14 +61,12 @@
 import java.util.Iterator;
 
 import static org.jruby.truffle.core.rope.CodeRange.CR_7BIT;
-import static org.jruby.truffle.core.rope.CodeRange.CR_BROKEN;
 import static org.jruby.truffle.core.string.StringSupport.EMPTY_STRING_ARRAY;
-import static org.jruby.truffle.core.string.StringSupport.codeRangeScan;
 
 public class ClassicRegexp implements ReOptions {
     private final RubyContext context;
     private Regex pattern;
-    private ByteList str = ByteList.EMPTY_BYTELIST;
+    private Rope str = RopeConstants.EMPTY_UTF8_ROPE;
     private RegexpOptions options;
 
     public void setLiteral() {
@@ -153,20 +153,20 @@ public Integer run(Object context, Matcher matcher) throws InterruptedException
         this.options = new RegexpOptions();
     }
 
-    private ClassicRegexp(RubyContext context, ByteList str, RegexpOptions options) {
+    private ClassicRegexp(RubyContext context, Rope str, RegexpOptions options) {
         this(context);
         str.getClass();
 
         regexpInitialize(str, str.getEncoding(), options);
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
-    public static ClassicRegexp newRegexp(RubyContext runtime, ByteList pattern, int options) {
+    public static ClassicRegexp newRegexp(RubyContext runtime, Rope pattern, int options) {
         return newRegexp(runtime, pattern, RegexpOptions.fromEmbeddedOptions(options));
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
-    public static ClassicRegexp newRegexp(RubyContext runtime, ByteList pattern, RegexpOptions options) {
+    public static ClassicRegexp newRegexp(RubyContext runtime, Rope pattern, RegexpOptions options) {
         //try {
             return new ClassicRegexp(runtime, pattern, (RegexpOptions)options.clone());
         //} catch (RaiseException re) {
@@ -179,23 +179,23 @@ public static ClassicRegexp newRegexp(RubyContext runtime, ByteList pattern, Reg
      * error as opposed to any non-literal regexp creation which may raise a syntax error but will not
      * have this extra source info in the error message
      */
-    public static ClassicRegexp newRegexpParser(RubyContext runtime, ByteList pattern, RegexpOptions options) {
+    public static ClassicRegexp newRegexpParser(RubyContext runtime, Rope pattern, RegexpOptions options) {
         return new ClassicRegexp(runtime, pattern, (RegexpOptions)options.clone());
     }
 
-    private static void preprocessLight(RubyContext context, ByteList str, Encoding enc, Encoding[]fixedEnc, RegexpSupport.ErrorMode mode) {
+    private static void preprocessLight(RubyContext context, Rope str, Encoding enc, Encoding[]fixedEnc, RegexpSupport.ErrorMode mode) {
         if (enc.isAsciiCompatible()) {
             fixedEnc[0] = null;
         } else {
             fixedEnc[0] = enc;
         }
 
-        boolean hasProperty = unescapeNonAscii(context, null, str.getUnsafeBytes(), str.getBegin(), str.getBegin() + str.getRealSize(), enc, fixedEnc, str, mode);
+        boolean hasProperty = unescapeNonAscii(context, null, str.getBytes(), 0, str.byteLength(), enc, fixedEnc, str, mode);
         if (hasProperty && fixedEnc[0] == null) fixedEnc[0] = enc;
     }
 
     @SuppressWarnings("fallthrough")
-    public static boolean unescapeNonAscii(RubyContext context, ByteList to, byte[] bytes, int p, int end, Encoding enc, Encoding[] encp, ByteList str, RegexpSupport.ErrorMode mode) {
+    public static boolean unescapeNonAscii(RubyContext context, ByteList to, byte[] bytes, int p, int end, Encoding enc, Encoding[] encp, Rope str, RegexpSupport.ErrorMode mode) {
         boolean hasProperty = false;
         byte[] buf = null;
 
@@ -274,7 +274,7 @@ public static boolean unescapeNonAscii(RubyContext context, ByteList to, byte[]
         return hasProperty;
     }
 
-    private static int unescapeUnicodeBmp(RubyContext context, ByteList to, byte[] bytes, int p, int end, Encoding[] encp, ByteList str, RegexpSupport.ErrorMode mode) {
+    private static int unescapeUnicodeBmp(RubyContext context, ByteList to, byte[] bytes, int p, int end, Encoding[] encp, Rope str, RegexpSupport.ErrorMode mode) {
         if (p + 4 > end) raisePreprocessError(context, str, "invalid Unicode escape", mode);
         int code = StringSupport.scanHex(bytes, p, 4);
         int len = StringSupport.hexLength(bytes, p, 4);
@@ -283,7 +283,7 @@ private static int unescapeUnicodeBmp(RubyContext context, ByteList to, byte[] b
         return p + 4;
     }
 
-    private static int unescapeUnicodeList(RubyContext context, ByteList to, byte[]bytes, int p, int end, Encoding[]encp, ByteList str, RegexpSupport.ErrorMode mode) {
+    private static int unescapeUnicodeList(RubyContext context, ByteList to, byte[]bytes, int p, int end, Encoding[]encp, Rope str, RegexpSupport.ErrorMode mode) {
         while (p < end && ASCIIEncoding.INSTANCE.isSpace(bytes[p] & 0xff)) p++;
 
         boolean hasUnicode = false;
@@ -302,7 +302,7 @@ private static int unescapeUnicodeList(RubyContext context, ByteList to, byte[]b
         return p;
     }
 
-    private static void appendUtf8(RubyContext context, ByteList to, int code, Encoding[] enc, ByteList str, RegexpSupport.ErrorMode mode) {
+    private static void appendUtf8(RubyContext context, ByteList to, int code, Encoding[] enc, Rope str, RegexpSupport.ErrorMode mode) {
         checkUnicodeRange(context, code, str, mode);
 
         if (code < 0x80) {
@@ -363,14 +363,14 @@ public static int utf8Decode(RubyContext context, byte[]to, int p, int code) {
         throw new org.jruby.truffle.language.control.RaiseException(context.getCoreExceptions().rangeError("pack(U): value out of range", null));
     }
 
-    private static void checkUnicodeRange(RubyContext context, int code, ByteList str, RegexpSupport.ErrorMode mode) {
+    private static void checkUnicodeRange(RubyContext context, int code, Rope str, RegexpSupport.ErrorMode mode) {
         // Unicode is can be only 21 bits long, int is enough
         if ((0xd800 <= code && code <= 0xdfff) /* Surrogates */ || 0x10ffff < code) {
             raisePreprocessError(context, str, "invalid Unicode range", mode);
         }
     }
 
-    private static int unescapeEscapedNonAscii(RubyContext context, ByteList to, byte[]bytes, int p, int end, Encoding enc, Encoding[]encp, ByteList str, RegexpSupport.ErrorMode mode) {
+    private static int unescapeEscapedNonAscii(RubyContext context, ByteList to, byte[]bytes, int p, int end, Encoding enc, Encoding[]encp, Rope str, RegexpSupport.ErrorMode mode) {
         byte[]chBuf = new byte[enc.maxLength()];
         int chLen = 0;
 
@@ -398,7 +398,7 @@ private static int unescapeEscapedNonAscii(RubyContext context, ByteList to, byt
         return p;
     }
 
-    public static int raisePreprocessError(RubyContext context, ByteList str, String err, RegexpSupport.ErrorMode mode) {
+    public static int raisePreprocessError(RubyContext context, Rope str, String err, RegexpSupport.ErrorMode mode) {
         switch (mode) {
             case RAISE:
                 throw new org.jruby.truffle.language.control.RaiseException(context.getCoreExceptions().regexpError(err, null));
@@ -411,7 +411,7 @@ public static int raisePreprocessError(RubyContext context, ByteList str, String
     }
 
     @SuppressWarnings("fallthrough")
-    public static int readEscapedByte(RubyContext context, byte[] to, int toP, byte[] bytes, int p, int end, ByteList str, RegexpSupport.ErrorMode mode) {
+    public static int readEscapedByte(RubyContext context, byte[] to, int toP, byte[] bytes, int p, int end, Rope str, RegexpSupport.ErrorMode mode) {
         if (p == end || bytes[p++] != (byte)'\\') raisePreprocessError(context, str, "too short escaped multibyte character", mode);
 
         boolean metaPrefix = false, ctrlPrefix = false;
@@ -490,12 +490,12 @@ public static int readEscapedByte(RubyContext context, byte[] to, int toP, byte[
         } // while
     }
 
-    public static void preprocessCheck(RubyContext runtime, ByteList bytes) {
+    public static void preprocessCheck(RubyContext runtime, Rope bytes) {
         preprocess(runtime, bytes, bytes.getEncoding(), new Encoding[]{null}, RegexpSupport.ErrorMode.RAISE);
     }
 
-    public static ByteList preprocess(RubyContext runtime, ByteList str, Encoding enc, Encoding[] fixedEnc, RegexpSupport.ErrorMode mode) {
-        ByteList to = new ByteList(str.getRealSize());
+    public static ByteList preprocess(RubyContext runtime, Rope str, Encoding enc, Encoding[] fixedEnc, RegexpSupport.ErrorMode mode) {
+        ByteList to = new ByteList(str.byteLength());
 
         if (enc.isAsciiCompatible()) {
             fixedEnc[0] = null;
@@ -504,24 +504,24 @@ public static ByteList preprocess(RubyContext runtime, ByteList str, Encoding en
             to.setEncoding(enc);
         }
 
-        boolean hasProperty = unescapeNonAscii(runtime, to, str.getUnsafeBytes(), str.getBegin(), str.getBegin() + str.getRealSize(), enc, fixedEnc, str, mode);
+        boolean hasProperty = unescapeNonAscii(runtime, to, str.getBytes(), 0, str.byteLength(), enc, fixedEnc, str, mode);
         if (hasProperty && fixedEnc[0] == null) fixedEnc[0] = enc;
         if (fixedEnc[0] != null) to.setEncoding(fixedEnc[0]);
         return to;
     }
 
-    public static ByteList preprocessDRegexp(RubyContext context, ByteList[] strings, RegexpOptions options) {
+    public static ByteList preprocessDRegexp(RubyContext context, Rope[] strings, RegexpOptions options) {
         ByteList string = null;
         Encoding regexpEnc = null;
 
         for (int i = 0; i < strings.length; i++) {
-            ByteList str = strings[i];
+            Rope str = strings[i];
             final Encoding[] encodingHolder = new Encoding[]{null};
             regexpEnc = processDRegexpElement(context, options, regexpEnc, encodingHolder, str);
             if (string == null) {
-                string = str.dup();
+                string = RopeOperations.getByteListReadOnly(str);
             } else {
-                string.append(str);
+                string.append(str.getBytes());
             }
         }
 
@@ -530,11 +530,11 @@ public static ByteList preprocessDRegexp(RubyContext context, ByteList[] strings
         return string;
     }
 
-    private static Encoding processDRegexpElement(RubyContext context, RegexpOptions options, Encoding regexpEnc, Encoding[] fixedEnc, ByteList str) {
+    private static Encoding processDRegexpElement(RubyContext context, RegexpOptions options, Encoding regexpEnc, Encoding[] fixedEnc, Rope str) {
         Encoding strEnc = str.getEncoding();
 
         if (options.isEncodingNone() && strEnc != ASCIIEncoding.INSTANCE) {
-            if (scanForCodeRange(str) != CR_7BIT) {
+            if (str.getCodeRange() != CR_7BIT) {
                 throw new org.jruby.truffle.language.control.RaiseException(context.getCoreExceptions().regexpError("/.../n has a non escaped non ASCII character in non ASCII-8BIT script", null));
             }
             strEnc = ASCIIEncoding.INSTANCE;
@@ -554,25 +554,14 @@ private static Encoding processDRegexpElement(RubyContext context, RegexpOptions
         return regexpEnc;
     }
 
-    private static CodeRange scanForCodeRange(ByteList str) {
-        CodeRange cr;
-        Encoding enc = str.getEncoding();
-        if (enc.minLength() > 1 && enc.isDummy()) {
-            cr = CR_BROKEN;
-        } else {
-            cr = codeRangeScan(EncodingUtils.getActualEncoding(enc, str), str);
-        }
-        return cr;
-    }
-
     /** rb_reg_quote
      *
      */
     private static final int QUOTED_V = 11;
-    public static ByteList quote19(ByteList bs, boolean asciiOnly) {
-        int p = bs.getBegin();
-        int end = p + bs.getRealSize();
-        byte[] bytes = bs.getUnsafeBytes();
+    public static Rope quote19(Rope bs, boolean asciiOnly) {
+        int p = 0;
+        int end = bs.byteLength();
+        byte[] bytes = bs.getBytes();
         Encoding enc = bs.getEncoding();
 
         metaFound: do {
@@ -604,18 +593,16 @@ public static ByteList quote19(ByteList bs, boolean asciiOnly) {
                 p += cl;
             }
             if (asciiOnly) {
-                ByteList tmp = bs.shallowDup();
-                tmp.setEncoding(USASCIIEncoding.INSTANCE);
-                return tmp;
+                return bs.withEncoding(USASCIIEncoding.INSTANCE, CR_7BIT);
             }
             return bs;
         } while (false);
 
         ByteList result = new ByteList(end * 2);
         result.setEncoding(asciiOnly ? USASCIIEncoding.INSTANCE : bs.getEncoding());
         byte[]obytes = result.getUnsafeBytes();
-        int op = p - bs.getBegin();
-        System.arraycopy(bytes, bs.getBegin(), obytes, 0, op);
+        int op = p;
+        System.arraycopy(bytes, 0, obytes, 0, op);
 
         while (p < end) {
             final int c;
@@ -671,11 +658,11 @@ public static ByteList quote19(ByteList bs, boolean asciiOnly) {
         }
 
         result.setRealSize(op);
-        return result;
+        return StringOperations.ropeFromByteList(result);
     }
 
     // rb_reg_initialize
-    public ClassicRegexp regexpInitialize(ByteList bytes, Encoding enc, RegexpOptions options) {
+    public ClassicRegexp regexpInitialize(Rope bytes, Encoding enc, RegexpOptions options) {
         this.options = options;
 
         //checkFrozen();
@@ -718,9 +705,9 @@ public static void appendOptions(ByteList to, RegexpOptions options) {
     @SuppressWarnings("unused")
     public ByteList toByteList() {
         RegexpOptions newOptions = (RegexpOptions)options.clone();
-        int p = str.getBegin();
-        int len = str.getRealSize();
-        byte[] bytes = str.getUnsafeBytes();
+        int p = 0;
+        int len = str.byteLength();
+        byte[] bytes = str.getBytes();
 
         ByteList result = new ByteList(len);
         result.append((byte)'(').append((byte)'?');
@@ -777,8 +764,8 @@ public ByteList toByteList() {
 
                 if (err) {
                     newOptions = options;
-                    p = str.getBegin();
-                    len = str.getRealSize();
+                    p = 0;
+                    len = str.byteLength();
                 }
             }
 

diff --git a/truffle/src/main/java/org/jruby/truffle/core/regexp/InterpolatedRegexpNode.java b/truffle/src/main/java/org/jruby/truffle/core/regexp/InterpolatedRegexpNode.java
@@ -47,10 +47,10 @@ public Object execute(VirtualFrame frame) {
 
     @TruffleBoundary
     private DynamicObject createRegexp(DynamicObject[] parts) {
-        final ByteList[] strings = new ByteList[children.length];
+        final Rope[] strings = new Rope[children.length];
 
         for (int n = 0; n < children.length; n++) {
-            strings[n] = StringOperations.getByteListReadOnly(parts[n]);
+            strings[n] = StringOperations.rope(parts[n]);
         }
 
         final ByteList preprocessed = ClassicRegexp.preprocessDRegexp(getContext(), strings, options);

diff --git a/truffle/src/main/java/org/jruby/truffle/core/regexp/MatchDataNodes.java b/truffle/src/main/java/org/jruby/truffle/core/regexp/MatchDataNodes.java
@@ -34,7 +34,6 @@
 import org.jruby.truffle.core.cast.TaintResultNode;
 import org.jruby.truffle.core.cast.ToIntNode;
 import org.jruby.truffle.core.rope.Rope;
-import org.jruby.truffle.core.string.ByteList;
 import org.jruby.truffle.core.string.StringGuards;
 import org.jruby.truffle.core.string.StringOperations;
 import org.jruby.truffle.core.string.StringSupport;
@@ -86,13 +85,13 @@ public static Object end(RubyContext context, DynamicObject matchData, int index
         return e;
     }
 
-    private static void updatePairs(ByteList source, Encoding encoding, Pair[] pairs) {
+    private static void updatePairs(Rope source, Encoding encoding, Pair[] pairs) {
         // Taken from org.jruby.RubyMatchData
         Arrays.sort(pairs);
 
         int length = pairs.length;
-        byte[]bytes = source.getUnsafeBytes();
-        int p = source.getBegin();
+        byte[]bytes = source.getBytes();
+        int p = 0;
         int s = p;
         int c = 0;
 
@@ -104,7 +103,7 @@ private static void updatePairs(ByteList source, Encoding encoding, Pair[] pairs
         }
     }
 
-    private static Region getCharOffsetsManyRegs(DynamicObject matchData, ByteList source, Encoding encoding) {
+    private static Region getCharOffsetsManyRegs(DynamicObject matchData, Rope source, Encoding encoding) {
         // Taken from org.jruby.RubyMatchData
         final Region regs = Layouts.MATCH_DATA.getRegion(matchData);
         int numRegs = regs.numRegs;
@@ -162,7 +161,7 @@ public static Region getCharOffsets(DynamicObject matchData) {
 
     @TruffleBoundary
     private static Region createCharOffsets(DynamicObject matchData) {
-        final ByteList source = StringOperations.getByteListReadOnly(Layouts.MATCH_DATA.getSource(matchData));
+        final Rope source = StringOperations.rope(Layouts.MATCH_DATA.getSource(matchData));
         final Encoding enc = source.getEncoding();
         final Region charOffsets = getCharOffsetsManyRegs(matchData, source, enc);
         Layouts.MATCH_DATA.setCharOffsets(matchData, charOffsets);