Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 819137547853
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: bfc5ee462405
Choose a head ref
  • 2 commits
  • 9 files changed
  • 1 contributor

Commits on Aug 25, 2017

  1. Revert "Use jcodings 1.0.25 to better handle encoding to charset look…

    …up."
    
    This reverts commit 8191375.
    headius committed Aug 25, 2017
    Copy the full SHA
    f398844 View commit details
  2. Work around jcodings null charsets by doing a second lookup.

    This is equivalent to fixes made for #4716 in jcodings 1.0.25, but
    other changes in that version made it too risky to upgrade in
    JRuby 9.1.x. The code here wraps jcodings with the appropriate
    logic rather than updating.
    headius committed Aug 25, 2017
    Copy the full SHA
    bfc5ee4 View commit details
2 changes: 1 addition & 1 deletion core/pom.rb
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@

jar 'org.jruby.joni:joni:2.1.11'
jar 'org.jruby.extras:bytelist:1.0.15'
jar 'org.jruby.jcodings:jcodings:1.0.25-SNAPSHOT'
jar 'org.jruby.jcodings:jcodings:1.0.18'
jar 'org.jruby:dirgra:0.3'

jar 'com.headius:invokebinder:1.7'
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
@@ -183,7 +183,7 @@ DO NOT MODIFIY - GENERATED CODE
<dependency>
<groupId>org.jruby.jcodings</groupId>
<artifactId>jcodings</artifactId>
<version>1.0.25-SNAPSHOT</version>
<version>1.0.18</version>
</dependency>
<dependency>
<groupId>org.jruby</groupId>
3 changes: 2 additions & 1 deletion core/src/main/java/org/jruby/Ruby.java
Original file line number Diff line number Diff line change
@@ -63,6 +63,7 @@
import org.jruby.runtime.JavaSites;
import org.jruby.runtime.invokedynamic.InvokeDynamicSupport;
import org.jruby.util.MRIRecursionGuard;
import org.jruby.util.io.EncodingUtils;
import org.objectweb.asm.util.TraceClassVisitor;

import jnr.constants.Constant;
@@ -2778,7 +2779,7 @@ public Charset getDefaultCharset() {
enc = UTF8Encoding.INSTANCE;
}

Charset charset = enc.getCharset();
Charset charset = EncodingUtils.charsetForEncoding(enc);

return charset;
}
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -533,7 +533,7 @@ public static RubyString newInternalFromJavaExternal(Ruby runtime, String str) {
// Ruby internal
Encoding internal = runtime.getDefaultInternalEncoding();
Charset rubyInt = null;
if ( internal != null ) rubyInt = internal.getCharset();
if ( internal != null ) rubyInt = EncodingUtils.charsetForEncoding(internal);

if ( rubyInt == null ) {
Encoding javaExtEncoding = runtime.getEncodingService().getJavaDefault();
@@ -5511,7 +5511,7 @@ public String getUnicodeValue() {

public static ByteList encodeBytelist(CharSequence value, Encoding encoding) {

Charset charset = encoding.getCharset();
Charset charset = EncodingUtils.charsetForEncoding(encoding);

// if null charset, let our transcoder handle it
if (charset == null) {
5 changes: 3 additions & 2 deletions core/src/main/java/org/jruby/ast/SymbolNode.java
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@
import org.jruby.ast.visitor.NodeVisitor;
import org.jruby.lexer.yacc.ISourcePosition;
import org.jruby.util.ByteList;
import org.jruby.util.io.EncodingUtils;

/**
* Represents a symbol (:symbol_name).
@@ -51,7 +52,7 @@ public class SymbolNode extends Node implements ILiteralNode, INameNode, SideEff
// Interned ident path (e.g. [':', ident]).
public SymbolNode(ISourcePosition position, String name, Encoding encoding, int cr) {
super(position, false);
this.bytes = new ByteList(name.getBytes(encoding.getCharset()), encoding);
this.bytes = new ByteList(name.getBytes(EncodingUtils.charsetForEncoding(encoding)), encoding);
}

// String path (e.g. [':', str_beg, str_content, str_end])
@@ -79,7 +80,7 @@ public <T> T accept(NodeVisitor<T> iVisitor) {
* @return Returns a String
*/
public String getName() {
return new String(bytes.unsafeBytes(), bytes.getEncoding().getCharset());
return new String(bytes.unsafeBytes(), EncodingUtils.charsetForEncoding(bytes.getEncoding()));
}

public Encoding getEncoding() {
3 changes: 2 additions & 1 deletion core/src/main/java/org/jruby/ir/operands/Symbol.java
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
import org.jruby.ir.persistence.IRWriterEncoder;
import org.jruby.runtime.ThreadContext;
import org.jruby.util.ByteList;
import org.jruby.util.io.EncodingUtils;

public class Symbol extends ImmutableLiteral {
public static final Symbol KW_REST_ARG_DUMMY = new Symbol("", ASCIIEncoding.INSTANCE);
@@ -18,7 +19,7 @@ public class Symbol extends ImmutableLiteral {
public Symbol(String name, Encoding encoding) {
super();

this.bytes = new ByteList(name.getBytes(encoding.getCharset()), encoding);
this.bytes = new ByteList(name.getBytes(EncodingUtils.charsetForEncoding(encoding)), encoding);
}

public Symbol(ByteList bytes) {
3 changes: 2 additions & 1 deletion core/src/main/java/org/jruby/lexer/LexingCommon.java
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
import org.jruby.lexer.yacc.StackState;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;
import org.jruby.util.io.EncodingUtils;

/**
* Code and constants common to both ripper and main parser.
@@ -115,7 +116,7 @@ public String createTokenString(int start) {
public String createAsEncodedString(byte[] bytes, int start, int length, Encoding encoding) {
// FIXME: We should be able to move some faster non-exception cache using Encoding.isDefined
try {
Charset charset = getEncoding().getCharset();
Charset charset = EncodingUtils.charsetForEncoding(getEncoding());
if (charset != null) {
if (charset == RubyEncoding.UTF8) {
return RubyEncoding.decodeUTF8(bytes, start, length);
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/runtime/Helpers.java
Original file line number Diff line number Diff line change
@@ -2635,7 +2635,7 @@ public static String symbolBytesToString(ByteList value) {
if (encoding == USASCIIEncoding.INSTANCE || encoding == ASCIIEncoding.INSTANCE) {
return value.toString(); // raw
} else if (encoding instanceof UnicodeEncoding) {
return new String(value.getUnsafeBytes(), value.getBegin(), value.getRealSize(), value.getEncoding().getCharset());
return new String(value.getUnsafeBytes(), value.getBegin(), value.getRealSize(), EncodingUtils.charsetForEncoding(value.getEncoding()));
} else {
return value.toString(); // raw
}
@@ -2679,7 +2679,7 @@ public static String decodeByteList(Ruby runtime, ByteList value) {
* @return the decoded string
*/
public static String byteListToString(final ByteList bytes) {
final Charset charset = bytes.getEncoding().getCharset();
final Charset charset = EncodingUtils.charsetForEncoding(bytes.getEncoding());
if ( charset != null ) {
return new String(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getRealSize(), charset);
}
24 changes: 23 additions & 1 deletion core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -48,6 +48,7 @@
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -1329,7 +1330,7 @@ public static ByteList transcodeString(String string, Encoding toEncoding, int e

EConv ec = TranscoderDB.open(encoding.getName(), toEncoding.getName(), ecflags);

byte[] inBytes = string.getBytes(encoding.getCharset());
byte[] inBytes = string.getBytes(EncodingUtils.charsetForEncoding(encoding));
Ptr inPos = new Ptr(0);

int inStop = inBytes.length;
@@ -2205,4 +2206,25 @@ else if (c < 0x10000) {
return buf.length;
}

/**
* Get an appropriate Java Charset for the given Encoding.
*
* This works around a bug in jcodings where it would return null as the charset for encodings that should have
* a match, like Windows-1252. This method is equivalent to enc.getCharset in jcodings 1.0.25 and higher.
*
* See https://github.com/jruby/jruby/issues/4716 for more information.
*
* @param enc the encoding for which to get a matching charset
* @return the matching charset
*/
public static Charset charsetForEncoding(Encoding enc) {
Charset charset = enc.getCharset();

if (charset == null) {
charset = Charset.forName(enc.toString());
}

return charset;
}

}