Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 3d1d49fe2711
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 020601013021
Choose a head ref
  • 6 commits
  • 6 files changed
  • 1 contributor

Commits on Feb 22, 2016

  1. Copy the full SHA
    de35d78 View commit details
  2. Fix unescape_ascii as in MRI bug #10670.

    Fixes MRI TestM17N#test_escaped_metachar.
    headius committed Feb 22, 2016
    Copy the full SHA
    60fd112 View commit details
  3. Implement rb_str_escape as in MRI and use it for inspect.

    Fixes MRI tests:
    
    * TestM17N#test_object_inspect_external
    * TestM17N#test_object_utf16_32_inspect
    headius committed Feb 22, 2016
    Copy the full SHA
    207a4b2 View commit details
  4. Copy the full SHA
    c5caf26 View commit details
  5. Copy the full SHA
    2f125f6 View commit details
  6. Fix for code range scanning with a dummy mbc encoding.

    Fixes MRI TestM17N#test_scrub_dummy_encoding.
    headius committed Feb 22, 2016
    Copy the full SHA
    0206010 View commit details
4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/RubyBasicObject.java
Original file line number Diff line number Diff line change
@@ -1100,12 +1100,12 @@ public static IRubyObject rbInspect(ThreadContext context, IRubyObject obj) {
if (enc == null) enc = runtime.getDefaultExternalEncoding();
if (!enc.isAsciiCompatible()) {
if (!str.isAsciiOnly()) {
return RubyString.inspect19(runtime, str.getByteList());
return RubyString.rbStrEscape(context, str);
}
return str;
}
if (str.getEncoding() != enc && !str.isAsciiOnly()) {
return RubyString.inspect19(runtime, str.getByteList());
return RubyString.rbStrEscape(context, str);
}
return str;
}
71 changes: 70 additions & 1 deletion core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -259,7 +259,12 @@ private void copyCodeRangeForSubstr(RubyString from, Encoding enc) {
public final int scanForCodeRange() {
int cr = getCodeRange();
if (cr == CR_UNKNOWN) {
cr = codeRangeScan(EncodingUtils.getActualEncoding(getEncoding(), value), value);
Encoding enc = getEncoding();
if (enc.minLength() > 1 && enc.isDummy()) {
cr = CR_BROKEN;
} else {
cr = codeRangeScan(EncodingUtils.getActualEncoding(getEncoding(), value), value);
}
setCodeRange(cr);
}
return cr;
@@ -1979,6 +1984,70 @@ public IRubyObject inspect19() {
}

// MRI: rb_str_escape
public static IRubyObject rbStrEscape(ThreadContext context, RubyString str) {
Ruby runtime = context.runtime;

Encoding enc = str.getEncoding();
ByteList strBL = str.getByteList();
byte[] pBytes = strBL.unsafeBytes();
int p = strBL.begin();
int pend = p + strBL.realSize();
int prev = p;
RubyString result = RubyString.newEmptyString(runtime);
boolean unicode_p = enc.isUnicode();
boolean asciicompat = enc.isAsciiCompatible();

while (p < pend) {
int c, cc;
int n = enc.length(pBytes, p, pend);
if (!MBCLEN_CHARFOUND_P(n)) {
if (p > prev) result.cat(pBytes, prev, p - prev);
n = enc.minLength();
if (pend < p + n)
n = (int)(pend - p);
while ((n--) > 0) {
result.modify();
Sprintf.sprintf(runtime, result.getByteList(), "\\x%02X", pBytes[p] & 0377);
prev = ++p;
}
continue;
}
n = MBCLEN_CHARFOUND_LEN(n);
c = enc.mbcToCode(pBytes, p, pend);
p += n;
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
case '\t': cc = 't'; break;
case '\f': cc = 'f'; break;
case '\013': cc = 'v'; break;
case '\010': cc = 'b'; break;
case '\007': cc = 'a'; break;
case 033: cc = 'e'; break;
default: cc = 0; break;
}
if (cc != 0) {
if (p - n > prev) result.cat(pBytes, prev, p - n - prev);
result.cat('\\');
result.cat((byte) cc);
prev = p;
}
else if (asciicompat && Encoding.isAscii(c) && (c < 0x7F && c > 31 /*ISPRINT(c)*/)) {
}
else {
if (p - n > prev) result.cat(pBytes, prev, p - n - prev);
result.modify();
Sprintf.sprintf(runtime, result.getByteList(), StringSupport.escapedCharFormat(c, unicode_p), (c & 0xFFFFFFFFL));
prev = p;
}
}
if (p > prev) result.cat(pBytes, prev, p - prev);
result.setEncodingAndCodeRange(USASCIIEncoding.INSTANCE, CR_7BIT);

result.infectBy(str);
return result;
}

public static IRubyObject inspect19(Ruby runtime, ByteList byteList) {
Encoding enc = byteList.getEncoding();
byte bytes[] = byteList.getUnsafeBytes();
5 changes: 4 additions & 1 deletion core/src/main/java/org/jruby/util/RegexpSupport.java
Original file line number Diff line number Diff line change
@@ -119,10 +119,13 @@ public static boolean unescapeNonAscii(Ruby runtime, ByteList to, byte[] bytes,
p -= 2;
if (enc == USASCIIEncoding.INSTANCE) {
if (buf == null) buf = new byte[1];
int pbeg = p;
p = readEscapedByte(runtime, buf, 0, bytes, p, end, str, mode);
c = buf[0];
if (c == (char)-1) return false;
if (to != null) to.append(c);
if (to != null) {
to.append(bytes, pbeg, p - pbeg);
}
}
else {
p = unescapeEscapedNonAscii(runtime, to, bytes, p, end, enc, encp, str, mode);
10 changes: 2 additions & 8 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -1832,15 +1832,9 @@ public static IRubyObject strCompatAndValid(ThreadContext context, IRubyObject _
if (cr == StringSupport.CR_BROKEN) {
throw context.runtime.newArgumentError("replacement must be valid byte sequence '" + str + "'");
}
else if (cr == StringSupport.CR_7BIT) {
Encoding e = STR_ENC_GET(str);
if (!enc.isAsciiCompatible()) {
throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " + enc + " and " + e);
}
}
else { /* ENC_CODERANGE_VALID */
else {
Encoding e = STR_ENC_GET(str);
if (enc != e) {
if (cr == StringSupport.CR_7BIT ? enc.minLength() != 1 : enc != e) {
throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " + enc + " and " + e);
}
}
2 changes: 2 additions & 0 deletions test/mri/excludes/TestAutoload.rb
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
#exclude :test_autoload_p, "needs investigation"
exclude :test_autoload_while_autoloading, "needs investigation"
exclude :test_require_implemented_in_ruby_is_called, "attempted to dispatch to require but it seems to recursively call itself"
1 change: 1 addition & 0 deletions test/mri/excludes/TestM17N.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
exclude :test_nonascii_method_name, "lexer is not pulling mbc characters off the wire correctly"
exclude :test_split, "our impl has diverged and does not appear to handle encoded null char properly"
exclude :test_symbol, "management of differently-encoded symbols is not right"
exclude :test_symbol_op, "some symbols are created early and do not have UTF-8 encoding; management of differently-encoded symbols is not right"