Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Truffle] Calculate code range as it's created in pack.
Browse files Browse the repository at this point in the history
nirvdrum committed Feb 1, 2016
1 parent 1403014 commit e7c8caa
Showing 4 changed files with 172 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -62,6 +62,7 @@
import org.jruby.truffle.runtime.methods.InternalMethod;
import org.jruby.truffle.runtime.methods.SharedMethodInfo;
import org.jruby.truffle.runtime.rope.AsciiOnlyLeafRope;
import org.jruby.truffle.runtime.rope.InvalidLeafRope;
import org.jruby.truffle.runtime.rope.Rope;
import org.jruby.truffle.runtime.rope.ValidLeafRope;
import org.jruby.util.Memo;
@@ -2489,13 +2490,15 @@ private DynamicObject finishPack(int formatLength, PackResult result) {
}
}

/*
* TODO CS 31-Jan-16 what can I usefully do with the code range? Create AsciiOnlyLeafRope? I'm not setting
* it in the pack nodes yet so it's always just VALID. Also can I use an AsciiOnlyLeafRope for a binary
* string that has bytes with the MSB set?
*/

final Rope rope = new ValidLeafRope(bytes, encoding, result.getStringLength());
final Rope rope;
if (result.getStringCodeRange() == StringSupport.CR_VALID) {
// TODO (nirvdrum 01-Feb-16): We probably should have a node for creating ropes with a known character length.
rope = new ValidLeafRope(bytes, encoding, result.getStringLength());
} else {
rope = makeLeafRopeNode.executeMake(bytes, encoding, result.getStringCodeRange());
}

final DynamicObject string = createString(rope);

if (result.isTainted()) {
Original file line number Diff line number Diff line change
@@ -171,7 +171,15 @@ protected void increaseStringLength(VirtualFrame frame, int additionalLength) {
}

protected void setStringCodeRange(VirtualFrame frame, int codeRange) {
frame.setInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT, codeRange);
try {
final int existingCodeRange = frame.getInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT);

if (codeRange > existingCodeRange) {
frame.setInt(PackFrameDescriptor.STRING_CODE_RANGE_SLOT, codeRange);
}
} catch (FrameSlotTypeException e) {
throw new IllegalStateException(e);
}
}

/**
Original file line number Diff line number Diff line change
@@ -17,6 +17,8 @@
import org.jruby.truffle.core.format.nodes.PackNode;
import org.jruby.truffle.core.format.runtime.exceptions.RangeException;
import org.jruby.truffle.runtime.RubyContext;
import org.jruby.truffle.runtime.core.StringOperations;
import org.jruby.util.StringSupport;

/**
* Write a Unicode character out as UTF-8 bytes.
@@ -36,61 +38,118 @@ public WriteUTF8CharacterNode(RubyContext context) {
public Object writeSingleByte(VirtualFrame frame, long value) {
writeByte(frame,
(byte) value);

if (StringOperations.isUTF8ValidOneByte((byte) value)) {
setStringCodeRange(frame, StringSupport.CR_7BIT);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x7f", "value <= 0x7ff"})
public Object writeTwoBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 6) & 0xff) | 0xc0),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -2 + 1);

if (StringOperations.isUTF8ValidTwoBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x7ff", "value <= 0xffff"})
public Object writeThreeBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 12) & 0xff) | 0xe0),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -3 + 1);

if (StringOperations.isUTF8ValidThreeBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0xffff", "value <= 0x1fffff"})
public Object writeFourBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 18) & 0xff) | 0xf0),
(byte)(((value >>> 12) & 0x3f) | 0x80),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -4 + 1);

if (StringOperations.isUTF8ValidFourBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x1fffff", "value <= 0x3ffffff"})
public Object writeFiveBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 24) & 0xff) | 0xf8),
(byte)(((value >>> 18) & 0x3f) | 0x80),
(byte)(((value >>> 12) & 0x3f) | 0x80),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -5 + 1);

if (StringOperations.isUTF8ValidFiveBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

@Specialization(guards = {"value > 0x3ffffff", "value <= 0x7fffffff"})
public Object writeSixBytes(VirtualFrame frame, long value) {
writeBytes(frame,
final byte[] bytes = {
(byte)(((value >>> 30) & 0xff) | 0xfc),
(byte)(((value >>> 24) & 0x3f) | 0x80),
(byte)(((value >>> 18) & 0x3f) | 0x80),
(byte)(((value >>> 12) & 0x3f) | 0x80),
(byte)(((value >>> 6) & 0x3f) | 0x80),
(byte)((value & 0x3f) | 0x80));
(byte)((value & 0x3f) | 0x80)
};

writeBytes(frame, bytes);
increaseStringLength(frame, -6 + 1);

if (StringOperations.isUTF8ValidSixBytes(bytes)) {
setStringCodeRange(frame, StringSupport.CR_VALID);
} else {
setStringCodeRange(frame, StringSupport.CR_BROKEN);
}

return null;
}

Original file line number Diff line number Diff line change
@@ -267,4 +267,89 @@ public static String decodeUTF8(DynamicObject string) {

return RopeOperations.decodeUTF8(Layouts.STRING.getRope(string));
}

public static boolean isUTF8ValidOneByte(byte b) {
return b >= 0;
}

public static boolean isUTF8ValidTwoBytes(byte... bytes) {
assert bytes.length == 2;

if (bytes[0] >= 0xc2 && bytes[0] <= 0xdf) {
return bytes[1] >= 0x80 && bytes[1] <= 0xbf;
}

return false;
}

public static boolean isUTF8ValidThreeBytes(byte... bytes) {
assert bytes.length == 3;

if (bytes[0] < 0xe0 || bytes[0] > 0xef) {
return false;
}

if (bytes[2] < 0x80 || bytes[2] > 0xbf) {
return false;
}

if (bytes[1] >= 0x80 || bytes[2] <= 0xbf) {
if (bytes[0] == 0xe0) {
return bytes[1] >= 0xa0;
}

if (bytes[0] == 0xed) {
return bytes[1] <= 0x9f;
}

return true;
}

return false;
}

public static boolean isUTF8ValidFourBytes(byte... bytes) {
assert bytes.length == 4;

if (bytes[3] < 0x80 || bytes[3] > 0xbf) {
return false;
}

if (bytes[2] < 0x80 || bytes[2] > 0xbf) {
return false;
}

if (bytes[0] < 0xf0 || bytes[0] > 0xf4) {
return false;
}

if (bytes[1] >= 0x80 || bytes[2] <= 0xbf) {
if (bytes[0] == 0xf0) {
return bytes[1] >= 0x90;
}

if (bytes[0] == 0xf4) {
return bytes[1] <= 0x8f;
}

return true;
}

return false;
}

public static boolean isUTF8ValidFiveBytes(byte... bytes) {
assert bytes.length == 5;

// There are currently no valid five byte UTF-8 codepoints.
return false;
}

public static boolean isUTF8ValidSixBytes(byte... bytes) {
assert bytes.length == 6;

// There are currently no valid six byte UTF-8 codepoints.
return false;
}

}

0 comments on commit e7c8caa

Please sign in to comment.