Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit d2d17a0e5c81fe0e11e6551a1ab6450fc5d4ee68
Author: Charles Oliver Nutter <headius@headius.com>
Date:   Tue Aug 1 00:55:34 2017 -0500

    Use jcodings 1.0.24.

commit 9abc2bbb264335c3d08d6c62eea9f1c412fc8e2c
Author: Charles Oliver Nutter <headius@headius.com>
Date:   Tue Aug 1 00:49:08 2017 -0500

    Tweaks for chr fixes.

    * Raise proper error for out of char range in encMbcput (found by
      ruby/spec)
    * Don't use single-arg chr from multi-arg path at all (caused
      infinite recursion; may impact perf by missing single-byte
      ByteLists)

commit e343686
Author: Charles Oliver Nutter <headius@headius.com>
Date:   Mon Jul 31 23:44:04 2017 -0500

    Various fixes for Integer#chr.

    * Fixed missing chr logic from MRI
    * Fixed encoding utility methods that were missing or different
    * Fixed error handling of invalid values
    * Updated jcodings to get sjis, single-byte fixes
headius committed Aug 1, 2017
1 parent ffbad9c commit 0f746f6
Showing 5 changed files with 80 additions and 25 deletions.
2 changes: 1 addition & 1 deletion core/pom.rb
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@

jar 'org.jruby.joni:joni:2.1.12'
jar 'org.jruby.extras:bytelist:1.0.15'
jar 'org.jruby.jcodings:jcodings:1.0.23'
jar 'org.jruby.jcodings:jcodings:1.0.24'
jar 'org.jruby:dirgra:0.3'

jar 'com.headius:invokebinder:1.9'
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
@@ -183,7 +183,7 @@ DO NOT MODIFIY - GENERATED CODE
<dependency>
<groupId>org.jruby.jcodings</groupId>
<artifactId>jcodings</artifactId>
<version>1.0.23</version>
<version>1.0.24-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.jruby</groupId>
40 changes: 27 additions & 13 deletions core/src/main/java/org/jruby/RubyInteger.java
Original file line number Diff line number Diff line change
@@ -50,6 +50,7 @@
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;
import org.jruby.util.io.EncodingUtils;

import java.math.RoundingMode;

@@ -320,18 +321,22 @@ static ByteList singleCharByteList(final byte index) {
@JRubyMethod(name = "chr")
public RubyString chr(ThreadContext context) {
Ruby runtime = context.runtime;
int value = (int) getLongValue();
if (value >= 0 && value <= 0xFF) {
ByteList bytes = SINGLE_CHAR_BYTELISTS[value];
return RubyString.newStringShared(runtime, bytes, bytes.getEncoding());
} else {
Encoding enc = runtime.getDefaultInternalEncoding();
if (value > 0xFF && (enc == null || enc == ASCIIEncoding.INSTANCE)) {

// rb_num_to_uint
long i = getLongValue() & 0xFFFFFFFFL;
int c = (int) i;

Encoding enc;

if (0xff < i) {
enc = runtime.getDefaultInternalEncoding();
if (enc == null) {
throw runtime.newRangeError(toString() + " out of char range");
}
if (enc == null) enc = USASCIIEncoding.INSTANCE;
return RubyString.newStringNoCopy(runtime, fromEncodedBytes(runtime, enc, value), enc, 0);
return chrCommon(context, c, enc);
}

return RubyString.newStringShared(runtime, SINGLE_CHAR_BYTELISTS[c]);
}

@Deprecated
@@ -342,17 +347,26 @@ public final RubyString chr19(ThreadContext context) {
@JRubyMethod(name = "chr")
public RubyString chr(ThreadContext context, IRubyObject arg) {
Ruby runtime = context.runtime;
long value = getLongValue();

// rb_num_to_uint
long i = getLongValue() & 0xFFFFFFFFL;

Encoding enc;
if (arg instanceof RubyEncoding) {
enc = ((RubyEncoding)arg).getEncoding();
} else {
enc = arg.convertToString().toEncoding(runtime);
}
if (enc == ASCIIEncoding.INSTANCE && value >= 0x80) {
return chr19(context);
return chrCommon(context, i, enc);
}

private RubyString chrCommon(ThreadContext context, long value, Encoding enc) {
if (value > 0xFFFFFFFFL) {
throw context.runtime.newRangeError(this + " out of char range");
}
return RubyString.newStringNoCopy(runtime, fromEncodedBytes(runtime, enc, value), enc, 0);
int c = (int) value;
if (enc == null) enc = ASCIIEncoding.INSTANCE;
return EncodingUtils.encUintChr(context, c, enc);
}

@Deprecated
2 changes: 1 addition & 1 deletion core/src/main/java/org/jruby/util/StringSupport.java
Original file line number Diff line number Diff line change
@@ -153,7 +153,7 @@ public static int length(Encoding enc, byte[]bytes, int p, int end) {

// rb_enc_precise_mbclen
public static int preciseLength(Encoding enc, byte[]bytes, int p, int end) {
if (p >= end) return -1 - (1);
if (p >= end) return MBCLEN_NEEDMORE(1);
int n = enc.length(bytes, p, end);
if (n > end - p) return MBCLEN_NEEDMORE(n - (end - p));
return n;
59 changes: 50 additions & 9 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -4,6 +4,8 @@
import org.jcodings.EncodingDB;
import org.jcodings.Ptr;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.exception.EncodingError;
import org.jcodings.exception.EncodingException;
import org.jcodings.exception.ErrorCodes;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
@@ -1990,31 +1992,62 @@ public static IRubyObject ioEncStr(Ruby runtime, IRubyObject str, OpenFile fptr)
}

// rb_enc_uint_chr
public static IRubyObject encUintChr(ThreadContext context, int code, Encoding enc) {
public static RubyString encUintChr(ThreadContext context, int code, Encoding enc) {
Ruby runtime = context.runtime;

long i = code & 0xFFFFFFFFL;

int n;
switch (n = enc.codeToMbcLength(code)) {
switch (n = EncodingUtils.encCodelen(context, code, enc)) {
case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE:
throw runtime.newRangeError("invalid codepoint " + Integer.toHexString(code) + " in " + enc);
throw runtime.newRangeError("invalid codepoint " + Long.toHexString(i) + " in " + enc);
case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE:
case 0:
throw runtime.newRangeError(Integer.toString(code) + " out of char range");
throw runtime.newRangeError(Long.toString(i) + " out of char range");
}

ByteList strBytes = new ByteList(n);
strBytes.setEncoding(enc);
strBytes.length(n);
enc.codeToMbc(code, strBytes.unsafeBytes(), strBytes.begin());
if (enc.length(strBytes.unsafeBytes(), strBytes.begin(), strBytes.realSize()) != n) {
throw runtime.newRangeError("invalid codepoint " + Integer.toHexString(code) + " in " + enc);
byte[] bytes = strBytes.unsafeBytes();
int begin = strBytes.begin();
int end = strBytes.realSize();

encMbcput(context, code, bytes, begin, enc);
if (StringSupport.preciseLength(enc, bytes, begin, end) != n) {
throw runtime.newRangeError("invalid codepoint " + Long.toHexString(i) + " in " + enc);
}

return RubyString.newString(runtime, strBytes);

}

// rb_enc_mbcput
// rb_enc_mbcput with Java exception
public static void encMbcput(int c, byte[] buf, int p, Encoding enc) {
enc.codeToMbc(c, buf, p);
int len = enc.codeToMbc(c, buf, p);
if (len < 0) {
throw new EncodingException(EncodingError.fromCode(len));
}
}

// rb_enc_mbcput with Ruby exception
public static void encMbcput(ThreadContext context, int c, byte[] buf, int p, Encoding enc) {
int len = enc.codeToMbc(c, buf, p);

// in MRI, this check occurs within some of the individual encoding functions, such as the
// US-ASCII check for values >= 0x80. In MRI, unlike in JRuby, we can't throw Ruby errors
// from within encoding logic, so we try to reproduce the expected results via normal
// error codes here.
// See MRI's rb_enc_mbcput and related downstream encoding functions.
if (len < 0) {
switch (len) {
case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE:
throw context.runtime.newRangeError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc);
case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE:
throw context.runtime.newRangeError("" + (c & 0xFFFFFFFFL) + " out of char range");
}
throw context.runtime.newEncodingError(EncodingError.fromCode(len).getMessage());
}
}

// rb_enc_codepoint_len
@@ -2210,4 +2243,12 @@ else if (c < 0x10000) {
return buf.length;
}

public static int encCodelen(ThreadContext context, int c, Encoding enc) {
int n = enc.codeToMbcLength(c);
if (n == 0) {
throw context.runtime.newArgumentError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc);
}
return n;
}

}

0 comments on commit 0f746f6

Please sign in to comment.