Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'truffle-ropes-2' into truffle-ropes-on-head
Browse files Browse the repository at this point in the history
nirvdrum committed Jan 27, 2016
2 parents 358331b + 0b72e28 commit 6ad0311
Showing 21 changed files with 471 additions and 216 deletions.
Original file line number Diff line number Diff line change
@@ -19,6 +19,8 @@
import org.jruby.truffle.runtime.RubyContext;
import org.jruby.truffle.runtime.core.StringOperations;
import org.jruby.truffle.runtime.layouts.Layouts;
import org.jruby.truffle.runtime.rope.Rope;
import org.jruby.truffle.runtime.rope.RopeOperations;
import org.jruby.truffle.translator.BodyTranslator;
import org.jruby.util.RegexpOptions;

@@ -48,13 +50,15 @@ public Object execute(VirtualFrame frame) {

final org.jruby.RubyString preprocessed = org.jruby.RubyRegexp.preprocessDRegexp(getContext().getRuntime(), strings, options);

final DynamicObject regexp = RegexpNodes.createRubyRegexp(getContext(), this, getContext().getCoreLibrary().getRegexpClass(), preprocessed.getByteList(), options);
final DynamicObject regexp = RegexpNodes.createRubyRegexp(getContext(), this, getContext().getCoreLibrary().getRegexpClass(), StringOperations.ropeFromByteList(preprocessed.getByteList()), options);

if (options.isEncodingNone()) {
final Rope source = Layouts.REGEXP.getSource(regexp);

if (!BodyTranslator.all7Bit(preprocessed.getByteList().bytes())) {
Layouts.REGEXP.getSource(regexp).setEncoding(getContext().getRuntime().getEncodingService().getAscii8bitEncoding());
Layouts.REGEXP.setSource(regexp, RopeOperations.withEncoding(source, getContext().getRuntime().getEncodingService().getAscii8bitEncoding()));
} else {
Layouts.REGEXP.getSource(regexp).setEncoding(getContext().getRuntime().getEncodingService().getUSAsciiEncoding());
Layouts.REGEXP.setSource(regexp, RopeOperations.withEncoding(source, getContext().getRuntime().getEncodingService().getUSAsciiEncoding()));
}
}

72 changes: 42 additions & 30 deletions truffle/src/main/java/org/jruby/truffle/nodes/core/RegexpNodes.java
Original file line number Diff line number Diff line change
@@ -40,6 +40,7 @@
import org.jruby.truffle.runtime.core.StringOperations;
import org.jruby.truffle.runtime.layouts.Layouts;
import org.jruby.truffle.runtime.rope.Rope;
import org.jruby.truffle.runtime.rope.RopeOperations;
import org.jruby.util.*;

import java.nio.charset.StandardCharsets;
@@ -55,17 +56,17 @@ public static Object matchCommon(RubyContext context, DynamicObject regexp, Dyna
assert RubyGuards.isRubyRegexp(regexp);
assert RubyGuards.isRubyString(source);

final ByteList sourceByteList = StringOperations.getByteListReadOnly(source);
final Rope sourceRope = StringOperations.rope(source);

final ByteList bl = Layouts.REGEXP.getSource(regexp);
final Encoding enc = checkEncoding(regexp, StringOperations.getCodeRangeableReadOnly(source), true);
final ByteList preprocessed = RegexpSupport.preprocess(context.getRuntime(), bl, enc, new Encoding[] { null }, RegexpSupport.ErrorMode.RAISE);
final Rope regexpSourceRope = Layouts.REGEXP.getSource(regexp);
final Encoding enc = checkEncoding(regexp, sourceRope, true);
final ByteList preprocessed = RegexpSupport.preprocess(context.getRuntime(), regexpSourceRope.getUnsafeByteList(), enc, new Encoding[] { null }, RegexpSupport.ErrorMode.RAISE);

final Regex r = new Regex(preprocessed.getUnsafeBytes(), preprocessed.getBegin(), preprocessed.getBegin() + preprocessed.getRealSize(), Layouts.REGEXP.getOptions(regexp).toJoniOptions(), checkEncoding(regexp, StringOperations.getCodeRangeableReadOnly(source), true));
final Matcher matcher = r.matcher(sourceByteList.unsafeBytes(), sourceByteList.begin(), sourceByteList.begin() + sourceByteList.realSize());
int range = sourceByteList.begin() + sourceByteList.realSize();
final Regex r = new Regex(preprocessed.getUnsafeBytes(), preprocessed.getBegin(), preprocessed.getBegin() + preprocessed.getRealSize(), Layouts.REGEXP.getOptions(regexp).toJoniOptions(), checkEncoding(regexp, sourceRope, true));
final Matcher matcher = r.matcher(sourceRope.getBytes(), sourceRope.begin(), sourceRope.begin() + sourceRope.realSize());
int range = sourceRope.begin() + sourceRope.realSize();

return matchCommon(context, regexp, source, operator, setNamedCaptures, matcher, sourceByteList.begin() + startPos, range);
return matchCommon(context, regexp, source, operator, setNamedCaptures, matcher, sourceRope.begin() + startPos, range);
}

@TruffleBoundary
@@ -197,7 +198,7 @@ private static void setLocalVariable(Frame frame, String name, Object value) {
}
}

public static ByteList shimModifiers(ByteList bytes) {
public static Rope shimModifiers(Rope bytes) {
// Joni doesn't support (?u) etc but we can shim some common cases

String bytesString = bytes.toString();
@@ -223,14 +224,15 @@ public static ByteList shimModifiers(ByteList bytes) {
throw new UnsupportedOperationException();
}

bytes = ByteList.create(bytesString);
// TODO (nirvdrum 25-Jan-16): We probably just want a way to create a Rope from a java.lang.String.
bytes = StringOperations.ropeFromByteList(ByteList.create(bytesString));
}

return bytes;
}

@TruffleBoundary
public static Regex compile(Node currentNode, RubyContext context, ByteList bytes, RegexpOptions options) {
public static Regex compile(Node currentNode, RubyContext context, Rope bytes, RegexpOptions options) {
bytes = shimModifiers(bytes);

try {
@@ -249,13 +251,14 @@ public static Regex compile(Node currentNode, RubyContext context, ByteList byte
}
*/

final ByteList byteList = bytes.getUnsafeByteList();
Encoding enc = bytes.getEncoding();
Encoding[] fixedEnc = new Encoding[]{null};
ByteList unescaped = RegexpSupport.preprocess(context.getRuntime(), bytes, enc, fixedEnc, RegexpSupport.ErrorMode.RAISE);
ByteList unescaped = RegexpSupport.preprocess(context.getRuntime(), byteList, enc, fixedEnc, RegexpSupport.ErrorMode.RAISE);
if (fixedEnc[0] != null) {
if ((fixedEnc[0] != enc && options.isFixed()) ||
(fixedEnc[0] != ASCIIEncoding.INSTANCE && options.isEncodingNone())) {
RegexpSupport.raiseRegexpError19(context.getRuntime(), bytes, enc, options, "incompatible character encoding");
RegexpSupport.raiseRegexpError19(context.getRuntime(), byteList, enc, options, "incompatible character encoding");
}
if (fixedEnc[0] != ASCIIEncoding.INSTANCE) {
options.setFixed(true);
@@ -268,10 +271,8 @@ public static Regex compile(Node currentNode, RubyContext context, ByteList byte
if (fixedEnc[0] != null) options.setFixed(true);
//if (regexpOptions.isEncodingNone()) setEncodingNone();

bytes.setEncoding(enc);

Regex ret = new Regex(unescaped.getUnsafeBytes(), unescaped.getBegin(), unescaped.getBegin() + unescaped.getRealSize(), options.toJoniOptions(), enc, Syntax.RUBY);
ret.setUserObject(bytes);
ret.setUserObject(RopeOperations.withEncoding(bytes, enc));

return ret;
} catch (ValueException e) {
@@ -293,7 +294,7 @@ public static void setRegex(DynamicObject regexp, Regex regex) {
Layouts.REGEXP.setRegex(regexp, regex);
}

public static void setSource(DynamicObject regexp, ByteList source) {
public static void setSource(DynamicObject regexp, Rope source) {
Layouts.REGEXP.setSource(regexp, source);
}

@@ -302,7 +303,7 @@ public static void setOptions(DynamicObject regexp, RegexpOptions options) {
}

// TODO (nirvdrum 03-June-15) Unify with JRuby in RegexpSupport.
public static Encoding checkEncoding(DynamicObject regexp, CodeRangeable str, boolean warn) {
public static Encoding checkEncoding(DynamicObject regexp, Rope str, boolean warn) {
assert RubyGuards.isRubyRegexp(regexp);

final Regex pattern = Layouts.REGEXP.getRegex(regexp);
@@ -313,7 +314,7 @@ public static Encoding checkEncoding(DynamicObject regexp, CodeRangeable str, bo
}
*/
//check();
Encoding enc = str.getByteList().getEncoding();
Encoding enc = str.getEncoding();
if (!enc.isAsciiCompatible()) {
if (enc != pattern.getEncoding()) {
//encodingMatchError(getRuntime(), pattern, enc);
@@ -336,31 +337,42 @@ public static Encoding checkEncoding(DynamicObject regexp, CodeRangeable str, bo
return enc;
}

public static void initialize(RubyContext context, DynamicObject regexp, Node currentNode, ByteList setSource, int options) {
public static void initialize(RubyContext context, DynamicObject regexp, Node currentNode, Rope setSource, int options) {
assert RubyGuards.isRubyRegexp(regexp);
setSource(regexp, setSource);
setOptions(regexp, RegexpOptions.fromEmbeddedOptions(options));
setRegex(regexp, compile(currentNode, context, setSource, Layouts.REGEXP.getOptions(regexp)));
final RegexpOptions regexpOptions = RegexpOptions.fromEmbeddedOptions(options);
final Regex regex = compile(currentNode, context, setSource, regexpOptions);

// The RegexpNodes.compile operation may modify the encoding of the source rope. This modified copy is stored
// in the Regex object as the "user object". Since ropes are immutable, we need to take this updated copy when
// constructing the final regexp.
setSource(regexp, (Rope) regex.getUserObject());
setOptions(regexp, regexpOptions);
setRegex(regexp, regex);
}

public static void initialize(DynamicObject regexp, Regex setRegex, ByteList setSource) {
public static void initialize(DynamicObject regexp, Regex setRegex, Rope setSource) {
assert RubyGuards.isRubyRegexp(regexp);
setRegex(regexp, setRegex);
setSource(regexp, setSource);
}

public static DynamicObject createRubyRegexp(RubyContext context, Node currentNode, DynamicObject regexpClass, ByteList regex, RegexpOptions options) {
return Layouts.REGEXP.createRegexp(Layouts.CLASS.getInstanceFactory(regexpClass), RegexpNodes.compile(currentNode, context, regex, options), regex, options, null);
public static DynamicObject createRubyRegexp(RubyContext context, Node currentNode, DynamicObject regexpClass, Rope source, RegexpOptions options) {
final Regex regexp = RegexpNodes.compile(currentNode, context, source, options);

// The RegexpNodes.compile operation may modify the encoding of the source rope. This modified copy is stored
// in the Regex object as the "user object". Since ropes are immutable, we need to take this updated copy when
// constructing the final regexp.
return Layouts.REGEXP.createRegexp(Layouts.CLASS.getInstanceFactory(regexpClass), regexp, (Rope) regexp.getUserObject(), options, null);
}

public static DynamicObject createRubyRegexp(DynamicObject regexpClass, Regex regex, ByteList source, RegexpOptions options) {
public static DynamicObject createRubyRegexp(DynamicObject regexpClass, Regex regex, Rope source, RegexpOptions options) {
final DynamicObject regexp = Layouts.REGEXP.createRegexp(Layouts.CLASS.getInstanceFactory(regexpClass), null, null, RegexpOptions.NULL_OPTIONS, null);
RegexpNodes.setOptions(regexp, options);
RegexpNodes.initialize(regexp, regex, source);
return regexp;
}

public static DynamicObject createRubyRegexp(DynamicObject regexpClass, Regex regex, ByteList source) {
public static DynamicObject createRubyRegexp(DynamicObject regexpClass, Regex regex, Rope source) {
final DynamicObject regexp = Layouts.REGEXP.createRegexp(Layouts.CLASS.getInstanceFactory(regexpClass), null, null, RegexpOptions.NULL_OPTIONS, null);
RegexpNodes.initialize(regexp, regex, source);
return regexp;
@@ -502,7 +514,7 @@ public SourceNode(RubyContext context, SourceSection sourceSection) {

@Specialization
public DynamicObject source(DynamicObject regexp) {
return createString(Layouts.REGEXP.getSource(regexp).dup());
return createString(Layouts.REGEXP.getSource(regexp));
}

}
@@ -517,7 +529,7 @@ public ToSNode(RubyContext context, SourceSection sourceSection) {
@TruffleBoundary
@Specialization
public DynamicObject toS(DynamicObject regexp) {
return createString(((org.jruby.RubyString) org.jruby.RubyRegexp.newRegexp(getContext().getRuntime(), Layouts.REGEXP.getSource(regexp), Layouts.REGEXP.getRegex(regexp).getOptions()).to_s()).getByteList());
return createString(((org.jruby.RubyString) org.jruby.RubyRegexp.newRegexp(getContext().getRuntime(), Layouts.REGEXP.getSource(regexp).getUnsafeByteList(), Layouts.REGEXP.getRegex(regexp).getOptions()).to_s()).getByteList());
}

}
97 changes: 94 additions & 3 deletions truffle/src/main/java/org/jruby/truffle/nodes/core/RopeNodes.java
Original file line number Diff line number Diff line change
@@ -10,13 +10,13 @@

package org.jruby.truffle.nodes.core;


import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.NodeChild;
import com.oracle.truffle.api.dsl.NodeChildren;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.nodes.Node.Child;
import com.oracle.truffle.api.object.DynamicObject;
import com.oracle.truffle.api.source.SourceSection;
import com.oracle.truffle.api.utilities.ConditionProfile;
import org.jcodings.Encoding;
@@ -111,7 +111,7 @@ private Rope makeSubstring(Rope base, int offset, int byteLength, ConditionProfi
return makeSubstringNon7Bit(base, offset, byteLength);
}

@CompilerDirectives.TruffleBoundary
@TruffleBoundary
private Rope makeSubstringNon7Bit(Rope base, int offset, int byteLength) {
final long packedLengthAndCodeRange = RopeOperations.calculateCodeRangeAndLength(base.getEncoding(), base.getBytes(), offset, offset + byteLength);
final int codeRange = StringSupport.unpackArg(packedLengthAndCodeRange);
@@ -344,4 +344,95 @@ protected static boolean isUnknown(int codeRange) {
return codeRange == StringSupport.CR_UNKNOWN;
}
}

@NodeChildren({
@NodeChild(type = RubyNode.class, value = "rope"),
@NodeChild(type = RubyNode.class, value = "currentLevel"),
@NodeChild(type = RubyNode.class, value = "printString")
})
public abstract static class DebugPrintRopeNode extends RubyNode {

public DebugPrintRopeNode(RubyContext context, SourceSection sourceSection) {
super(context, sourceSection);
}

public abstract DynamicObject executeDebugPrint(Rope rope, int currentLevel, boolean printString);

@TruffleBoundary
@Specialization
public DynamicObject debugPrintLeafRope(LeafRope rope, int currentLevel, boolean printString) {
printPreamble(currentLevel);

// Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand.
final boolean bytesAreNull = rope.getRawBytes() == null;

System.err.println(String.format("%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; D: %d)",
printString ? rope.toString() : "<skipped>",
rope.getClass().getSimpleName(),
bytesAreNull,
rope.byteLength(),
rope.characterLength(),
StringSupport.codeRangeAsString(rope.getCodeRange()),
rope.depth()));

return nil();
}

@TruffleBoundary
@Specialization
public DynamicObject debugPrintSubstringRope(SubstringRope rope, int currentLevel, boolean printString) {
printPreamble(currentLevel);

// Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand.
final boolean bytesAreNull = rope.getRawBytes() == null;

System.err.println(String.format("%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; O: %d; D: %d)",
printString ? rope.toString() : "<skipped>",
rope.getClass().getSimpleName(),
bytesAreNull,
rope.byteLength(),
rope.characterLength(),
StringSupport.codeRangeAsString(rope.getCodeRange()),
rope.getOffset(),
rope.depth()));

executeDebugPrint(rope.getChild(), currentLevel + 1, printString);

return nil();
}

@TruffleBoundary
@Specialization
public DynamicObject debugPrintConcatRope(ConcatRope rope, int currentLevel, boolean printString) {
printPreamble(currentLevel);

// Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand.
final boolean bytesAreNull = rope.getRawBytes() == null;

System.err.println(String.format("%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; D: %d; LD: %d; RD: %d)",
printString ? rope.toString() : "<skipped>",
rope.getClass().getSimpleName(),
bytesAreNull,
rope.byteLength(),
rope.characterLength(),
StringSupport.codeRangeAsString(rope.getCodeRange()),
rope.depth(),
rope.getLeft().depth(),
rope.getRight().depth()));

executeDebugPrint(rope.getLeft(), currentLevel + 1, printString);
executeDebugPrint(rope.getRight(), currentLevel + 1, printString);

return nil();
}

private void printPreamble(int level) {
if (level > 0) {
for (int i = 0; i < level; i++) {
System.err.print("| ");
}
}
}

}
}
Original file line number Diff line number Diff line change
@@ -68,4 +68,9 @@ public static boolean isEmpty(DynamicObject string) {
assert RubyGuards.isRubyString(string);
return Layouts.STRING.getRope(string).isEmpty();
}

public static boolean isBrokenCodeRange(DynamicObject string) {
assert RubyGuards.isRubyString(string);
return StringOperations.codeRange(string) == StringSupport.CR_BROKEN;
}
}
Loading

0 comments on commit 6ad0311

Please sign in to comment.