Skip to content

Commit

Permalink
Showing 9 changed files with 245 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
NaN = Float::NAN

def assert_equal(one, two)
raise "Expected size: `#{one.b.size}` Actual size: `#{two.size}`" if one.b.size != two.size
raise "Expected: `#{one.b}` Actual: `#{two}`" if one.b != two
end

85 changes: 85 additions & 0 deletions truffle/src/main/java/org/jruby/truffle/core/StringOperations.java
Original file line number Diff line number Diff line change
@@ -266,4 +266,89 @@ public static String decodeUTF8(DynamicObject string) {

return RopeOperations.decodeUTF8(Layouts.STRING.getRope(string));
}

public static boolean isUTF8ValidOneByte(byte b) {
return b >= 0;
}

public static boolean isUTF8ValidTwoBytes(byte... bytes) {
assert bytes.length == 2;

if (bytes[0] >= 0xc2 && bytes[0] <= 0xdf) {
return bytes[1] >= 0x80 && bytes[1] <= 0xbf;
}

return false;
}

public static boolean isUTF8ValidThreeBytes(byte... bytes) {
assert bytes.length == 3;

if (bytes[0] < 0xe0 || bytes[0] > 0xef) {
return false;
}

if (bytes[2] < 0x80 || bytes[2] > 0xbf) {
return false;
}

if (bytes[1] >= 0x80 || bytes[2] <= 0xbf) {
if (bytes[0] == 0xe0) {
return bytes[1] >= 0xa0;
}

if (bytes[0] == 0xed) {
return bytes[1] <= 0x9f;
}

return true;
}

return false;
}

public static boolean isUTF8ValidFourBytes(byte... bytes) {
assert bytes.length == 4;

if (bytes[3] < 0x80 || bytes[3] > 0xbf) {
return false;
}

if (bytes[2] < 0x80 || bytes[2] > 0xbf) {
return false;
}

if (bytes[0] < 0xf0 || bytes[0] > 0xf4) {
return false;
}

if (bytes[1] >= 0x80 || bytes[2] <= 0xbf) {
if (bytes[0] == 0xf0) {
return bytes[1] >= 0x90;
}

if (bytes[0] == 0xf4) {
return bytes[1] <= 0x8f;
}

return true;
}

return false;
}

public static boolean isUTF8ValidFiveBytes(byte... bytes) {
assert bytes.length == 5;

// There are currently no valid five byte UTF-8 codepoints.
return false;
}

public static boolean isUTF8ValidSixBytes(byte... bytes) {
assert bytes.length == 6;

// There are currently no valid six byte UTF-8 codepoints.
return false;
}

}
Original file line number Diff line number Diff line change
@@ -61,7 +61,10 @@
import org.jruby.truffle.runtime.methods.Arity;
import org.jruby.truffle.runtime.methods.InternalMethod;
import org.jruby.truffle.runtime.methods.SharedMethodInfo;
import org.jruby.truffle.runtime.rope.AsciiOnlyLeafRope;
import org.jruby.truffle.runtime.rope.InvalidLeafRope;
import org.jruby.truffle.runtime.rope.Rope;
import org.jruby.truffle.runtime.rope.ValidLeafRope;
import org.jruby.util.Memo;
import org.jruby.util.StringSupport;

@@ -2487,7 +2490,15 @@ private DynamicObject finishPack(int formatLength, PackResult result) {
}
}

final Rope rope = makeLeafRopeNode.executeMake(bytes, encoding, StringSupport.CR_UNKNOWN);

final Rope rope;
if (result.getStringCodeRange() == StringSupport.CR_VALID) {
// TODO (nirvdrum 01-Feb-16): We probably should have a node for creating ropes with a known character length.
rope = new ValidLeafRope(bytes, encoding, result.getStringLength());
} else {
rope = makeLeafRopeNode.executeMake(bytes, encoding, result.getStringCodeRange());
}

final DynamicObject string = createString(rope);

if (result.isTainted()) {
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@
import org.jruby.truffle.runtime.RubyContext;
import org.jruby.truffle.core.array.ArrayUtils;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;

import java.util.Arrays;

@@ -154,6 +155,34 @@ protected void setOutputPosition(VirtualFrame frame, int position) {
frame.setInt(PackFrameDescriptor.OUTPUT_POSITION_SLOT, position);
}

protected int getStringLength(VirtualFrame frame) {
try {
return frame.getInt(PackFrameDescriptor.STRING_LENGTH_SLOT);
} catch (FrameSlotTypeException e) {
throw new IllegalStateException(e);
}
}

protected void setStringLength(VirtualFrame frame, int length) {
frame.setInt(PackFrameDescriptor.STRING_LENGTH_SLOT, length);
}

protected void increaseStringLength(VirtualFrame frame, int additionalLength) {
setStringLength(frame, getStringLength(frame) + additionalLength);
}

protected void setStringCodeRange(VirtualFrame frame, int codeRange) {
try {
final int existingCodeRange = frame.getInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT);

if (codeRange > existingCodeRange) {
frame.setInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT, codeRange);
}
} catch (FrameSlotTypeException e) {
throw new IllegalStateException(e);
}
}

/**
* Set the output to be tainted.
*/
@@ -169,6 +198,8 @@ protected void writeByte(VirtualFrame frame, byte value) {
final int outputPosition = getOutputPosition(frame);
output[outputPosition] = value;
setOutputPosition(frame, outputPosition + 1);
setStringCodeRange(frame, value >= 0 ? StringSupport.CR_7BIT : StringSupport.CR_VALID);
increaseStringLength(frame, 1);
}

/**
@@ -193,6 +224,7 @@ protected void writeBytes(VirtualFrame frame, byte[] values, int valuesStart, in
final int outputPosition = getOutputPosition(frame);
System.arraycopy(values, valuesStart, output, outputPosition, valuesLength);
setOutputPosition(frame, outputPosition + valuesLength);
increaseStringLength(frame, valuesLength);
}

/**
@@ -203,6 +235,7 @@ protected void writeNullBytes(VirtualFrame frame, int length) {
ensureCapacity(frame, length);
final int outputPosition = getOutputPosition(frame);
setOutputPosition(frame, outputPosition + length);
increaseStringLength(frame, length);
}
}

Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
import org.jruby.truffle.core.format.runtime.PackFrameDescriptor;
import org.jruby.truffle.core.format.runtime.PackResult;
import org.jruby.truffle.runtime.RubyLanguage;
import org.jruby.util.StringSupport;

/**
* The node at the root of a pack expression.
@@ -46,6 +47,8 @@ public Object execute(VirtualFrame frame) {
frame.setInt(PackFrameDescriptor.SOURCE_POSITION_SLOT, 0);
frame.setObject(PackFrameDescriptor.OUTPUT_SLOT, new byte[expectedLength]);
frame.setInt(PackFrameDescriptor.OUTPUT_POSITION_SLOT, 0);
frame.setInt(PackFrameDescriptor.STRING_LENGTH_SLOT, 0);
frame.setInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT, StringSupport.CR_UNKNOWN);
frame.setBoolean(PackFrameDescriptor.TAINT_SLOT, false);

child.execute(frame);
@@ -79,7 +82,27 @@ public Object execute(VirtualFrame frame) {
throw new IllegalStateException(e);
}

return new PackResult(output, outputLength, taint, encoding);
final int stringLength;

if (encoding == PackEncoding.UTF_8) {
try {
stringLength = frame.getInt(PackFrameDescriptor.STRING_LENGTH_SLOT);
} catch (FrameSlotTypeException e) {
throw new IllegalStateException(e);
}
} else {
stringLength = outputLength;
}

final int stringCodeRange;

try {
stringCodeRange = frame.getInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT);
} catch (FrameSlotTypeException e) {
throw new IllegalStateException(e);
}

return new PackResult(output, outputLength, stringLength, stringCodeRange, taint, encoding);
}

@Override
Original file line number Diff line number Diff line change
@@ -77,7 +77,7 @@ public Object execute(VirtualFrame frame) {
throw new IllegalStateException(e);
}

return new PackResult(output, outputLength, taint, encoding);
return new PackResult(output, outputLength, -1, -1, taint, encoding);
}

@Override
Original file line number Diff line number Diff line change
@@ -17,6 +17,8 @@
import org.jruby.truffle.core.format.nodes.PackNode;
import org.jruby.truffle.core.format.runtime.exceptions.RangeException;
import org.jruby.truffle.runtime.RubyContext;
import org.jruby.truffle.core.StringOperations;
import org.jruby.util.StringSupport;

/**
* Write a Unicode character out as UTF-8 bytes.
@@ -36,56 +38,118 @@ public WriteUTF8CharacterNode(RubyContext context) {
public Object writeSingleByte(VirtualFrame frame, long value) {
writeByte(frame,
(byte) value);

if (StringOperations.isUTF8ValidOneByte((byte) value)) {
setStringCodeRange(frame, StringSupport.CR_7BIT);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x7f", "value <= 0x7ff"})
public Object writeTwoBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 6) & 0xff) | 0xc0),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -2 + 1);

if (StringOperations.isUTF8ValidTwoBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x7ff", "value <= 0xffff"})
public Object writeThreeBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 12) & 0xff) | 0xe0),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -3 + 1);

if (StringOperations.isUTF8ValidThreeBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0xffff", "value <= 0x1fffff"})
public Object writeFourBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 18) & 0xff) | 0xf0),
(byte)(((value >>> 12) & 0x3f) | 0x80),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -4 + 1);

if (StringOperations.isUTF8ValidFourBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x1fffff", "value <= 0x3ffffff"})
public Object writeFiveBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 24) & 0xff) | 0xf8),
(byte)(((value >>> 18) & 0x3f) | 0x80),
(byte)(((value >>> 12) & 0x3f) | 0x80),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -5 + 1);

if (StringOperations.isUTF8ValidFiveBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x3ffffff", "value <= 0x7fffffff"})
public Object writeSixBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 30) & 0xff) | 0xfc),
(byte)(((value >>> 24) & 0x3f) | 0x80),
(byte)(((value >>> 18) & 0x3f) | 0x80),
(byte)(((value >>> 12) & 0x3f) | 0x80),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -6 + 1);

if (StringOperations.isUTF8ValidSixBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

Original file line number Diff line number Diff line change
@@ -21,6 +21,8 @@ public class PackFrameDescriptor {
public static final FrameSlot SOURCE_POSITION_SLOT = FRAME_DESCRIPTOR.addFrameSlot("source-position", FrameSlotKind.Int);
public static final FrameSlot OUTPUT_SLOT = FRAME_DESCRIPTOR.addFrameSlot("output", FrameSlotKind.Object);
public static final FrameSlot OUTPUT_POSITION_SLOT = FRAME_DESCRIPTOR.addFrameSlot("output-position", FrameSlotKind.Int);
public static final FrameSlot STRING_LENGTH_SLOT = FRAME_DESCRIPTOR.addFrameSlot("string-length", FrameSlotKind.Int);
public static final FrameSlot STRING_CODE_RANGE_SLOT = FRAME_DESCRIPTOR.addFrameSlot("string-code-range", FrameSlotKind.Int);
public static final FrameSlot TAINT_SLOT = FRAME_DESCRIPTOR.addFrameSlot("taint", FrameSlotKind.Boolean);

}
Original file line number Diff line number Diff line change
@@ -13,12 +13,16 @@ public class PackResult {

private final Object output;
private final int outputLength;
private final int stringLength;
private final int stringCodeRange;
private final boolean tainted;
private final PackEncoding encoding;

public PackResult(Object output, int outputLength, boolean tainted, PackEncoding encoding) {
public PackResult(Object output, int outputLength, int stringLength, int stringCodeRange, boolean tainted, PackEncoding encoding) {
this.output = output;
this.outputLength = outputLength;
this.stringLength = stringLength;
this.stringCodeRange = stringCodeRange;
this.tainted = tainted;
this.encoding = encoding;
}
@@ -31,6 +35,14 @@ public int getOutputLength() {
return outputLength;
}

public int getStringLength() {
return stringLength;
}

public int getStringCodeRange() {
return stringCodeRange;
}

public boolean isTainted() {
return tainted;
}

0 comments on commit 1d2c94d

Please sign in to comment.