Skip to content

Commit

Permalink
Fixes related to jcodings UTF-8 length updates.
Browse files Browse the repository at this point in the history
* Update jcodings to get typo fix (failed to recognize highest
  upper bound).
* Fix some locations using jcodings without error checking.
* Modify one place that consumers expect to raise errors until it
  can be updated for return codes (StringSupport.codeLength).

This should fix recent regressions, but we may see new errors pop
up in places that are not checking these return codes, if I missed
them in my audit.

Jcodings is moving toward error codes for at least the methods
with MRI equivalents, so EncodingException will soon be defunct
(or replaced by something equivalent in JRuby proper).
headius committed Jul 31, 2017
1 parent d020f6f commit ffbad9c
Showing 5 changed files with 21 additions and 21 deletions.
2 changes: 1 addition & 1 deletion core/pom.rb
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@

jar 'org.jruby.joni:joni:2.1.12'
jar 'org.jruby.extras:bytelist:1.0.15'
jar 'org.jruby.jcodings:jcodings:1.0.22'
jar 'org.jruby.jcodings:jcodings:1.0.23'
jar 'org.jruby:dirgra:0.3'

jar 'com.headius:invokebinder:1.9'
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
@@ -183,7 +183,7 @@ DO NOT MODIFIY - GENERATED CODE
<dependency>
<groupId>org.jruby.jcodings</groupId>
<artifactId>jcodings</artifactId>
<version>1.0.22</version>
<version>1.0.23</version>
</dependency>
<dependency>
<groupId>org.jruby</groupId>
16 changes: 3 additions & 13 deletions core/src/main/java/org/jruby/RubyInteger.java
Original file line number Diff line number Diff line change
@@ -361,24 +361,14 @@ public final RubyString chr19(ThreadContext context, IRubyObject arg) {
}

private ByteList fromEncodedBytes(Ruby runtime, Encoding enc, long value) {
int n;
try {
n = value < 0 ? 0 : enc.codeToMbcLength((int)value);
} catch (EncodingException ee) {
n = 0;
}
int n = value < 0 ? 0 : enc.codeToMbcLength((int)value);

if (n <= 0) throw runtime.newRangeError(this.toString() + " out of char range");

ByteList bytes = new ByteList(n);

boolean ok = false;
try {
enc.codeToMbc((int)value, bytes.getUnsafeBytes(), 0);
ok = StringSupport.preciseLength(enc, bytes.unsafeBytes(), 0, n) == n;
} catch (EncodingException e) {
// ok = false, fall through
}
enc.codeToMbc((int)value, bytes.getUnsafeBytes(), 0);
boolean ok = StringSupport.preciseLength(enc, bytes.unsafeBytes(), 0, n) == n;

if (!ok) {
throw runtime.newRangeError("invalid codepoint " + String.format("0x%x in ", value) + enc.getCharsetName());
13 changes: 12 additions & 1 deletion core/src/main/java/org/jruby/util/StringSupport.java
Original file line number Diff line number Diff line change
@@ -31,6 +31,8 @@
import org.jcodings.Encoding;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.constants.CharacterType;
import org.jcodings.exception.EncodingError;
import org.jcodings.exception.EncodingException;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
@@ -448,7 +450,16 @@ public static int codePoint(Ruby runtime, Encoding enc, byte[] bytes, int p, int
}

public static int codeLength(Encoding enc, int c) {
return enc.codeToMbcLength(c);
int i = enc.codeToMbcLength(c);
return checkCodepointError(i);
}

public static int checkCodepointError(int i) {
if (i < 0) {
// for backward compat with code expecting exceptions
throw new EncodingException(EncodingError.fromCode(i));
}
return i;
}

public static long getAscii(Encoding enc, byte[]bytes, int p, int end) {
9 changes: 4 additions & 5 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -1994,22 +1994,21 @@ public static IRubyObject encUintChr(ThreadContext context, int code, Encoding e
Ruby runtime = context.runtime;

int n;
RubyString str;
switch (n = enc.codeToMbcLength(code)) {
case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE:
throw runtime.newRangeError("invalid codepoint " + Integer.toHexString(code) + " in " + enc);
case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE:
case 0:
throw runtime.newRangeError(Integer.toString(code) + " out of char range");
}
str = RubyString.newStringLight(runtime, n);
ByteList strBytes = str.getByteList();
ByteList strBytes = new ByteList(n);
strBytes.setEncoding(enc);
strBytes.length(n);
enc.codeToMbc(code, strBytes.unsafeBytes(), strBytes.begin());
if (enc.length(strBytes.unsafeBytes(), strBytes.begin(), strBytes.realSize()) != n) {
throw runtime.newRangeError("invalid codepoint " + Integer.toHexString(code) + " in " + enc);
}
strBytes.length(n);
return str;
return RubyString.newString(runtime, strBytes);

}

0 comments on commit ffbad9c

Please sign in to comment.