Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 3b03bdf81f59
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 9cc6f5285c6f
Choose a head ref
  • 3 commits
  • 4 files changed
  • 1 contributor

Commits on May 12, 2016

  1. Improve transcoding of Java String to bytes using joni.

    Previously, if the target encoding was not supported by the JDK,
    we would be unable to encode the string and would just make it
    UTF-8. Now if there's no JDK support we will fall back on joni
    transcoding.
    
    Part of #3877 work.
    headius committed May 12, 2016
    7
    Copy the full SHA
    d025e43 View commit details
  2. Copy the full SHA
    4fde8c6 View commit details
  3. Copy the full SHA
    9cc6f52 View commit details
19 changes: 2 additions & 17 deletions core/src/main/java/org/jruby/RubyFile.java
Original file line number Diff line number Diff line change
@@ -518,15 +518,7 @@ public static IRubyObject basename(ThreadContext context, IRubyObject recv, IRub
case 2:
return RubyString.newEmptyString(runtime, origString.getEncoding()).infectBy(args[0]);
case 3:
if (origEncoding.getCharset() != null) {
try {
return RubyString.newString(runtime, new ByteList(name.substring(2).getBytes(origEncoding.getCharsetName()), origString.getEncoding())).infectBy(args[0]);
} catch (UnsupportedEncodingException uee) {
// fall through to UTF-8 logic
}
}

return RubyString.newString(runtime, name.substring(2)).infectBy(args[0]);
return RubyString.newString(runtime, RubyString.encodeBytelist(name.substring(2), origEncoding));
default:
switch (name.charAt(2)) {
case '/':
@@ -581,15 +573,8 @@ public static IRubyObject basename(ThreadContext context, IRubyObject recv, IRub
name = name.substring(0, name.length() - ext.length());
}
}
if (origEncoding.getCharset() != null) {
try {
return RubyString.newString(runtime, new ByteList(name.getBytes(origEncoding.getCharsetName()), origString.getEncoding())).infectBy(args[0]);
} catch (UnsupportedEncodingException uee) {
// fall through to UTF-8 logic
}
}

return RubyString.newString(runtime, name).infectBy(args[0]);
return RubyString.newString(runtime, RubyString.encodeBytelist(name, origEncoding));
}

@JRubyMethod(required = 2, rest = true, meta = true)
6 changes: 4 additions & 2 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -5463,8 +5463,10 @@ public static ByteList encodeBytelist(CharSequence value, Encoding encoding) {

Charset charset = encoding.getCharset();

// if null charset, fall back on Java default charset
if (charset == null) charset = Charset.defaultCharset();
// if null charset, let our transcoder handle it
if (charset == null) {
return EncodingUtils.transcodeString(value.toString(), encoding, 0);
}

byte[] bytes;
if (charset == RubyEncoding.UTF8) {
38 changes: 38 additions & 0 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -1259,6 +1259,44 @@ public static void transcodeLoop(ThreadContext context, byte[] inBytes, Ptr inPo
}
}

/**
* A version of transcodeLoop for working without any Ruby runtime available.
*
* MRI: transcode_loop with no fallback and java.lang.String input
*/
public static ByteList transcodeString(String string, Encoding toEncoding, int ecflags) {
Encoding encoding;

// This may be inefficient if we aren't matching endianness right
if (Platform.BYTE_ORDER == Platform.LITTLE_ENDIAN) {
encoding = UTF16LEEncoding.INSTANCE;
} else {
encoding = UTF16BEEncoding.INSTANCE;
}

EConv ec = TranscoderDB.open(encoding.getName(), toEncoding.getName(), ecflags);

byte[] inBytes = string.getBytes(encoding.getCharset());
Ptr inPos = new Ptr(0);

int inStop = inBytes.length;
// most encodings will be shorter than UTF-16 for typical input
int outStop = (int)((double) inBytes.length / 1.5 + 1);

byte[] outBytes = new byte[outStop];
Ptr outPos = new Ptr(0);

ByteList destination = new ByteList(outBytes, toEncoding, false);

boolean success = transcodeLoop(ec, null, null, null, inBytes, inPos, outBytes, outPos, inStop, outStop, destination, strTranscodingResize);

if (!success) {
// TODO: anything?
}

return destination;
}

/**
* Perform the inner transcoding loop.
*
5 changes: 5 additions & 0 deletions spec/ruby/core/file/basename_spec.rb
Original file line number Diff line number Diff line change
@@ -146,6 +146,11 @@
File.basename('/path/Офис.m4a').should == "Офис.m4a"
end

it "returns the basename with the same encoding as the original" do
basename = File.basename('/path/file'.encode('Windows-1250'))
basename.encoding.should == Encoding.find('Windows-1250')
end

end

end