Skip to content

Commit

Permalink
[Truffle] Last of the encoding specs.
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisseaton committed Dec 24, 2014
1 parent 53610d2 commit 7da6fd7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
Expand Up @@ -14,8 +14,10 @@
import org.jruby.truffle.nodes.RubyNode;
import org.jruby.truffle.nodes.dispatch.DispatchHeadNode;
import org.jruby.truffle.runtime.RubyContext;
import org.jruby.truffle.runtime.core.RubyEncoding;
import org.jruby.truffle.runtime.core.RubyRegexp;
import org.jruby.truffle.runtime.core.RubyString;
import org.jruby.truffle.translator.BodyTranslator;
import org.jruby.util.RegexpOptions;

public class InteroplatedRegexpNode extends RubyNode {
Expand Down Expand Up @@ -43,7 +45,20 @@ public RubyRegexp executeRubyRegexp(VirtualFrame frame) {
}

final org.jruby.RubyString preprocessed = org.jruby.RubyRegexp.preprocessDRegexp(getContext().getRuntime(), strings, options);
return new RubyRegexp(this, getContext().getCoreLibrary().getRegexpClass(), preprocessed.getByteList(), options.toOptions());

final RubyRegexp regexp = new RubyRegexp(this, getContext().getCoreLibrary().getRegexpClass(), preprocessed.getByteList(), options.toOptions());

if (options.isEncodingNone()) {
// This isn't quite right - we shouldn't be looking up by name, we need a real reference to this constants

if (!BodyTranslator.all7Bit(preprocessed.getByteList().bytes())) {
regexp.forceEncoding((RubyEncoding) getContext().getCoreLibrary().getEncodingClass().getConstants().get("ASCII_8BIT").getValue());
} else {
regexp.forceEncoding((RubyEncoding) getContext().getCoreLibrary().getEncodingClass().getConstants().get("US_ASCII").getValue());
}
}

return regexp;
}

@Override
Expand Down
Expand Up @@ -2026,10 +2026,10 @@ public RubyNode visitRegexpNode(org.jruby.ast.RegexpNode node) {
if (node.getOptions().isEncodingNone()) {
// This isn't quite right - we shouldn't be looking up by name, we need a real reference to this constants

if (all7Bit(node.getValue().bytes())) {
regexp.forceEncoding((RubyEncoding) context.getCoreLibrary().getEncodingClass().getConstants().get("US_ASCII").getValue());
if (!all7Bit(node.getValue().bytes())) {
regexp.forceEncoding((RubyEncoding) context.getCoreLibrary().getEncodingClass().getConstants().get("ASCII_8BIT").getValue());
} else {
regexp.forceEncoding((RubyEncoding) context.getCoreLibrary().getEncodingClass().getConstants().get("ASCII-8BIT").getValue());
regexp.forceEncoding((RubyEncoding) context.getCoreLibrary().getEncodingClass().getConstants().get("US_ASCII").getValue());
}
} else if (node.getOptions().getKCode().getKCode().equals("SJIS")) {
regexp.forceEncoding((RubyEncoding) context.getCoreLibrary().getEncodingClass().getConstants().get("Windows_31J").getValue());
Expand All @@ -2048,11 +2048,21 @@ public RubyNode visitRegexpNode(org.jruby.ast.RegexpNode node) {
return literalNode;
}

private static boolean all7Bit(byte[] bytes) {
public static boolean all7Bit(byte[] bytes) {
for (int n = 0; n < bytes.length; n++) {
if (bytes[n] < 0 || bytes[n] > 0x7F) {
if (bytes[n] < 0 || bytes[n] > Byte.MAX_VALUE) {
return false;
}

if (bytes[n] == '\\' && n + 1 < bytes.length && bytes[n + 1] == 'x') {
int b = Integer.parseInt(new String(Arrays.copyOfRange(bytes, n + 2, n + 4)), 16);

if (b > 0x7F) {
return false;
}

n += 3;
}
}

return true;
Expand Down
2 changes: 0 additions & 2 deletions spec/truffle/tags/language/regexp/encoding_tags.txt

This file was deleted.

0 comments on commit 7da6fd7

Please sign in to comment.