Skip to content

Commit

Permalink
Set encoding for regexp. Used mostly by Marshal.load. Fixes #4008.
Browse files Browse the repository at this point in the history
Initial attempt was just implementing RubyRegexp.setEncoding, but
that failed because regexp were getting pulled off the wire with
no encoding and would not compile with multibyte bits in them.
The logic in this commit mimics MRI and passes appropriate tests
in their suite.
headius committed Jul 11, 2016
1 parent b865a52 commit c8d8226
Showing 3 changed files with 72 additions and 6 deletions.
1 change: 1 addition & 0 deletions core/src/main/java/org/jruby/RubyRegexp.java
Original file line number Diff line number Diff line change
@@ -1725,6 +1725,7 @@ private static IRubyObject regOperand(IRubyObject str, boolean check) {
return check ? str.convertToString() : str.checkStringType();
}

@Deprecated
public static RubyRegexp unmarshalFrom(UnmarshalStream input) throws java.io.IOException {
RubyRegexp result = newRegexp(input.getRuntime(), input.unmarshalString(), RegexpOptions.fromJoniOptions(input.readSignedByte()));
input.registerLinkTarget(result);
73 changes: 71 additions & 2 deletions core/src/main/java/org/jruby/runtime/marshal/UnmarshalStream.java
Original file line number Diff line number Diff line change
@@ -37,6 +37,7 @@
import java.io.InputStream;
import org.jcodings.Encoding;
import org.jcodings.EncodingDB.Entry;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;

@@ -61,6 +62,7 @@
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.runtime.encoding.EncodingCapable;
import org.jruby.util.ByteList;
import org.jruby.util.RegexpOptions;

/**
* Unmarshals objects from strings or streams in Ruby's marshal format.
@@ -222,8 +224,8 @@ private IRubyObject unmarshalObjectDirectly(int type, MarshalState state, boolea
case 'f' :
rubyObj = RubyFloat.unmarshalFrom(this);
break;
case '/' :
rubyObj = RubyRegexp.unmarshalFrom(this);
case '/':
rubyObj = unmarshalRegexp(state);
break;
case ':' :
rubyObj = RubySymbol.unmarshalFrom(this);
@@ -282,6 +284,73 @@ private IRubyObject unmarshalObjectDirectly(int type, MarshalState state, boolea
return rubyObj;
}

private IRubyObject unmarshalRegexp(MarshalState state) throws IOException {
IRubyObject rubyObj;ByteList byteList = unmarshalString();
byte opts = readSignedByte();
RegexpOptions reOpts = RegexpOptions.fromJoniOptions(opts);

if (state.isIvarWaiting()) {
RubyString tmpStr = RubyString.newString(runtime, byteList);
registerLinkTarget(tmpStr);
defaultVariablesUnmarshal(tmpStr);
byteList = tmpStr.getByteList();
state.setIvarWaiting(false);
}
if (byteList.getEncoding() == ASCIIEncoding.INSTANCE) {
/* 1.8 compatibility; remove escapes undefined in 1.8 */
byte[] ptrBytes = byteList.unsafeBytes();
int ptr = byteList.begin();
int dst = ptr;
int src = ptr;
int len = byteList.realSize();
long bs = 0;
for (; len-- > 0; ptrBytes[dst++] = ptrBytes[src++]) {
switch (ptrBytes[src]) {
case '\\':
bs++;
break;
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'o':
case 'p':
case 'q':
case 'u':
case 'y':
case 'E':
case 'F':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'X':
case 'Y':
if ((bs & 1) != 0) --dst;
default:
bs = 0;
break;
}
}
byteList.setRealSize(dst - ptr);
}

rubyObj = RubyRegexp.newRegexp(runtime, byteList, reOpts);
return rubyObj;
}

public Ruby getRuntime() {
return runtime;
4 changes: 0 additions & 4 deletions test/mri/excludes/TestMarshal.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
exclude :test_change_struct, "needs investigation"
exclude :test_class_ivar, "needs investigation"
exclude :test_class_nonascii, "needs investigation"
exclude :test_continuation, "not supported"
@@ -8,13 +7,10 @@
exclude :test_marshal_load_ivar, "needs investigation"
exclude :test_marshal_load_should_not_taint_classes, "needs investigation"
exclude :test_marshal_rational, "needs investigation"
exclude :test_marshal_symbol_ascii8bit, "needs investigation"
exclude :test_modify_array_during_dump, "needs investigation"
exclude :test_module_ivar, "needs investigation"
exclude :test_no_internal_ids, "debug info for frozen strings is visible in JRuby"
exclude :test_object_prepend, "needs investigation"
exclude :test_regexp, "needs investigation"
exclude :test_regexp2, "needs investigation"
exclude :test_singleton, "needs investigation"
exclude :test_struct_invalid_members, "needs investigation"
exclude :test_symlink_in_ivar, "seems to be trying to deserialize a symbol with ivars, which we don't support"

0 comments on commit c8d8226

Please sign in to comment.