Skip to content

Commit

Permalink
Showing 7 changed files with 36 additions and 10 deletions.
3 changes: 2 additions & 1 deletion core/src/main/java/org/jruby/Ruby.java
Original file line number Diff line number Diff line change
@@ -63,6 +63,7 @@
import org.jruby.runtime.JavaSites;
import org.jruby.runtime.invokedynamic.InvokeDynamicSupport;
import org.jruby.util.MRIRecursionGuard;
import org.jruby.util.io.EncodingUtils;
import org.objectweb.asm.util.TraceClassVisitor;

import jnr.constants.Constant;
@@ -2778,7 +2779,7 @@ public Charset getDefaultCharset() {
enc = UTF8Encoding.INSTANCE;
}

Charset charset = enc.getCharset();
Charset charset = EncodingUtils.charsetForEncoding(enc);

return charset;
}
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -533,7 +533,7 @@ public static RubyString newInternalFromJavaExternal(Ruby runtime, String str) {
// Ruby internal
Encoding internal = runtime.getDefaultInternalEncoding();
Charset rubyInt = null;
if ( internal != null ) rubyInt = internal.getCharset();
if ( internal != null ) rubyInt = EncodingUtils.charsetForEncoding(internal);

if ( rubyInt == null ) {
Encoding javaExtEncoding = runtime.getEncodingService().getJavaDefault();
@@ -5511,7 +5511,7 @@ public String getUnicodeValue() {

public static ByteList encodeBytelist(CharSequence value, Encoding encoding) {

Charset charset = encoding.getCharset();
Charset charset = EncodingUtils.charsetForEncoding(encoding);

// if null charset, let our transcoder handle it
if (charset == null) {
5 changes: 3 additions & 2 deletions core/src/main/java/org/jruby/ast/SymbolNode.java
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@
import org.jruby.ast.visitor.NodeVisitor;
import org.jruby.lexer.yacc.ISourcePosition;
import org.jruby.util.ByteList;
import org.jruby.util.io.EncodingUtils;

/**
* Represents a symbol (:symbol_name).
@@ -51,7 +52,7 @@ public class SymbolNode extends Node implements ILiteralNode, INameNode, SideEff
// Interned ident path (e.g. [':', ident]).
public SymbolNode(ISourcePosition position, String name, Encoding encoding, int cr) {
super(position, false);
this.bytes = new ByteList(name.getBytes(encoding.getCharset()), encoding);
this.bytes = new ByteList(name.getBytes(EncodingUtils.charsetForEncoding(encoding)), encoding);
}

// String path (e.g. [':', str_beg, str_content, str_end])
@@ -79,7 +80,7 @@ public <T> T accept(NodeVisitor<T> iVisitor) {
* @return Returns a String
*/
public String getName() {
return new String(bytes.unsafeBytes(), bytes.getEncoding().getCharset());
return new String(bytes.unsafeBytes(), EncodingUtils.charsetForEncoding(bytes.getEncoding()));
}

public Encoding getEncoding() {
3 changes: 2 additions & 1 deletion core/src/main/java/org/jruby/ir/operands/Symbol.java
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
import org.jruby.ir.persistence.IRWriterEncoder;
import org.jruby.runtime.ThreadContext;
import org.jruby.util.ByteList;
import org.jruby.util.io.EncodingUtils;

public class Symbol extends ImmutableLiteral {
public static final Symbol KW_REST_ARG_DUMMY = new Symbol("", ASCIIEncoding.INSTANCE);
@@ -18,7 +19,7 @@ public class Symbol extends ImmutableLiteral {
public Symbol(String name, Encoding encoding) {
super();

this.bytes = new ByteList(name.getBytes(encoding.getCharset()), encoding);
this.bytes = new ByteList(name.getBytes(EncodingUtils.charsetForEncoding(encoding)), encoding);
}

public Symbol(ByteList bytes) {
3 changes: 2 additions & 1 deletion core/src/main/java/org/jruby/lexer/LexingCommon.java
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
import org.jruby.lexer.yacc.StackState;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;
import org.jruby.util.io.EncodingUtils;

/**
* Code and constants common to both ripper and main parser.
@@ -115,7 +116,7 @@ public String createTokenString(int start) {
public String createAsEncodedString(byte[] bytes, int start, int length, Encoding encoding) {
// FIXME: We should be able to move some faster non-exception cache using Encoding.isDefined
try {
Charset charset = getEncoding().getCharset();
Charset charset = EncodingUtils.charsetForEncoding(getEncoding());
if (charset != null) {
if (charset == RubyEncoding.UTF8) {
return RubyEncoding.decodeUTF8(bytes, start, length);
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/runtime/Helpers.java
Original file line number Diff line number Diff line change
@@ -2635,7 +2635,7 @@ public static String symbolBytesToString(ByteList value) {
if (encoding == USASCIIEncoding.INSTANCE || encoding == ASCIIEncoding.INSTANCE) {
return value.toString(); // raw
} else if (encoding instanceof UnicodeEncoding) {
return new String(value.getUnsafeBytes(), value.getBegin(), value.getRealSize(), value.getEncoding().getCharset());
return new String(value.getUnsafeBytes(), value.getBegin(), value.getRealSize(), EncodingUtils.charsetForEncoding(value.getEncoding()));
} else {
return value.toString(); // raw
}
@@ -2679,7 +2679,7 @@ public static String decodeByteList(Ruby runtime, ByteList value) {
* @return the decoded string
*/
public static String byteListToString(final ByteList bytes) {
final Charset charset = bytes.getEncoding().getCharset();
final Charset charset = EncodingUtils.charsetForEncoding(bytes.getEncoding());
if ( charset != null ) {
return new String(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getRealSize(), charset);
}
24 changes: 23 additions & 1 deletion core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -48,6 +48,7 @@
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -1329,7 +1330,7 @@ public static ByteList transcodeString(String string, Encoding toEncoding, int e

EConv ec = TranscoderDB.open(encoding.getName(), toEncoding.getName(), ecflags);

byte[] inBytes = string.getBytes(encoding.getCharset());
byte[] inBytes = string.getBytes(EncodingUtils.charsetForEncoding(encoding));
Ptr inPos = new Ptr(0);

int inStop = inBytes.length;
@@ -2205,4 +2206,25 @@ else if (c < 0x10000) {
return buf.length;
}

/**
* Get an appropriate Java Charset for the given Encoding.
*
* This works around a bug in jcodings where it would return null as the charset for encodings that should have
* a match, like Windows-1252. This method is equivalent to enc.getCharset in jcodings 1.0.25 and higher.
*
* See https://github.com/jruby/jruby/issues/4716 for more information.
*
* @param enc the encoding for which to get a matching charset
* @return the matching charset
*/
public static Charset charsetForEncoding(Encoding enc) {
Charset charset = enc.getCharset();

if (charset == null) {
charset = Charset.forName(enc.toString());
}

return charset;
}

}

0 comments on commit 74c2fdf

Please sign in to comment.