Skip to content

Commit

Permalink
[Truffle] String#each_char is now multibyte-encoding aware.
Browse files Browse the repository at this point in the history
  • Loading branch information
nirvdrum committed Mar 10, 2015
1 parent dcee5f0 commit 8299588
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 18 deletions.
6 changes: 0 additions & 6 deletions spec/truffle/tags/core/string/chars_tags.txt

This file was deleted.

6 changes: 0 additions & 6 deletions spec/truffle/tags/core/string/each_char_tags.txt

This file was deleted.

Expand Up @@ -44,6 +44,7 @@
import org.jruby.truffle.runtime.rubinius.RubiniusByteArray;
import org.jruby.truffle.runtime.util.ArrayUtils;
import org.jruby.util.ByteList;
import org.jruby.util.CodeRangeSupport;
import org.jruby.util.Pack;
import org.jruby.util.StringSupport;
import org.jruby.util.io.EncodingUtils;
Expand Down Expand Up @@ -847,20 +848,74 @@ public Object eachChar(VirtualFrame frame, RubyString string, @SuppressWarnings(
return toEnumNode.call(frame, string, "to_enum", null, getContext().newSymbol("each_char"));
}

@Specialization
@Specialization(guards = "isValidOr7BitEncoding")
public RubyString eachChar(VirtualFrame frame, RubyString string, RubyProc block) {
notDesignedForCompilation();
ByteList strByteList = string.getByteList();
byte[] ptrBytes = strByteList.unsafeBytes();
int ptr = strByteList.begin();
int len = strByteList.getRealSize();
Encoding enc = string.getBytes().getEncoding();

// TODO (nirvdrum 04-Feb-15): This needs to support Ruby' encoding and code range semantics. For now, this hack will suffice for very simple Strings.
final String javaString = string.toString();
final int stringLength = string.getBytes().length();
int n;

for (int i = 0; i < stringLength; i += n) {
n = StringSupport.encFastMBCLen(ptrBytes, ptr + i, ptr + len, enc);

for (int i = 0; i < javaString.length(); i++) {
yield(frame, block, getContext().makeString(javaString.charAt(i)));
yield(frame, block, substr(string, i, n));
}

return string;
}

@Specialization(guards = "!isValidOr7BitEncoding")
public RubyString eachCharMultiByteEncoding(VirtualFrame frame, RubyString string, RubyProc block) {
ByteList strByteList = string.getByteList();
byte[] ptrBytes = strByteList.unsafeBytes();
int ptr = strByteList.begin();
int len = strByteList.getRealSize();
Encoding enc = string.getBytes().getEncoding();

final int stringLength = string.getBytes().length();
int n;

for (int i = 0; i < stringLength; i += n) {
n = multiByteStringLength(enc, ptrBytes, ptr + i, ptr + len);

yield(frame, block, substr(string, i, n));
}

return string;
}

public static boolean isValidOr7BitEncoding(RubyString string) {
return string.isCodeRangeValid() || CodeRangeSupport.isCodeRangeAsciiOnly(string);
}

@TruffleBoundary
private int multiByteStringLength(Encoding enc, byte[] bytes, int p, int end) {
return StringSupport.length(enc, bytes, p, end);
}

// TODO (nirvdrum 10-Mar-15): This was extracted from JRuby, but likely will need to become a Rubinius primitive.
private Object substr(RubyString string, int beg, int len) {
final ByteList bytes = string.getBytes();

int length = bytes.length();
if (len < 0 || beg > length) return getContext().getCoreLibrary().getNilObject();

if (beg < 0) {
beg += length;
if (beg < 0) getContext().getCoreLibrary().getNilObject();
}

int end = Math.min(length, beg + len);

final ByteList substringBytes = new ByteList(bytes, beg, end - beg);
substringBytes.setEncoding(bytes.getEncoding());

return getContext().makeString(string.getLogicalClass(), substringBytes);
}
}

@CoreMethod(names = "empty?")
Expand Down

0 comments on commit 8299588

Please sign in to comment.