Skip to content

Commit

Permalink
Various improvements to dregexp processing.
Browse files Browse the repository at this point in the history
* Ensure only one regexp is ever cached for //o

This is done using an AtomicReference in the compiled method for
JVM6 and a field + atomic updater in the indy call site. In both
cases, we may end up evaluating the operands twice, and the code
that produced them may still run after caching (a bug,
#2798), but we will at least guarantee to return
exactly one regexp.

* Add non-boxed paths to construct dregexp with up to 5 elements.

* Add a ThreadContext-local Encoding[1] to use for encoding
  negotiation when preprocessing the dregexp elements.

* If, at JIT time, a once-dregexp has already been encountered and
  cached in the instr, just emit that regexp directly into the
  bytecode.

This new logic is faster than what we had before, likely because
the locking I put in place for JVM6 was preventing the JVM from
jitting (punted out with "COMPILE SKIPPED: invalid parsing" due
to a flaw in my code). This new logic is lighter-weight and JITs
fine. Given the benchmark from #3735:

9.0.5:  3.87s
9.1:    0.70s
1.7.24: 0.72s
headius committed Mar 16, 2016
1 parent 9159938 commit 256e753
Showing 8 changed files with 348 additions and 112 deletions.
141 changes: 115 additions & 26 deletions core/src/main/java/org/jruby/RubyRegexp.java
Original file line number Diff line number Diff line change
@@ -486,43 +486,132 @@ public static RubyString preprocessDRegexp(Ruby runtime, RubyString[] strings, i
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject[] strings, RegexpOptions options) {
RubyString string = null;
Encoding regexpEnc = null;
Encoding[] fixedEnc = new Encoding[1];

for (int i = 0; i < strings.length; i++) {
RubyString str = strings[i].convertToString();
Encoding strEnc = str.getEncoding();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, runtime.getCurrentContext().encodingHolder(), str);
string = string == null ? string = (RubyString)str.dup() : string.append19(str);
}

if (options.isEncodingNone() && strEnc != ASCIIEncoding.INSTANCE) {
if (str.scanForCodeRange() != StringSupport.CR_7BIT) {
throw runtime.newRegexpError("/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
}
strEnc = ASCIIEncoding.INSTANCE;
}
if (regexpEnc != null) string.setEncoding(regexpEnc);

// This used to call preprocess, but the resulting bytelist was not
// used. Since the preprocessing error-checking can be done without
// creating a new bytelist, I added a "light" path.
RubyRegexp.preprocessLight(runtime, str.getByteList(), strEnc, fixedEnc, RegexpSupport.ErrorMode.PREPROCESS);
return string;
}

if (fixedEnc[0] != null) {
if (regexpEnc != null && regexpEnc != fixedEnc[0]) {
throw runtime.newRegexpError("encoding mismatch in dynamic regexp: " + new String(regexpEnc.getName()) + " and " + new String(fixedEnc[0].getName()));
}
regexpEnc = fixedEnc[0];
}
public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, options, null, runtime.getCurrentContext().encodingHolder());
}

public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, options, null, runtime.getCurrentContext().encodingHolder());
}

public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, arg2, options, null, runtime.getCurrentContext().encodingHolder());
}

public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, arg2, arg3, options, null, runtime.getCurrentContext().encodingHolder());
}

public static RubyString preprocessDRegexp(Ruby runtime, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, IRubyObject arg4, RegexpOptions options) {
return processElementIntoResult(runtime, null, arg0, arg1, arg2, arg3, arg4, options, null, runtime.getCurrentContext().encodingHolder());
}

private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
IRubyObject arg2,
IRubyObject arg3,
IRubyObject arg4,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, arg2, arg3, arg4, options, regexpEnc, fixedEnc);
}

private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
IRubyObject arg2,
IRubyObject arg3,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, arg2, arg3, options, regexpEnc, fixedEnc);
}

private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
IRubyObject arg2,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, arg2, options, regexpEnc, fixedEnc);
}

private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
IRubyObject arg1,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
return processElementIntoResult(runtime, result == null ? str.strDup(runtime) : result.append19(str), arg1, options, regexpEnc, fixedEnc);
}

private static RubyString processElementIntoResult(
Ruby runtime,
RubyString result,
IRubyObject arg0,
RegexpOptions options,
Encoding regexpEnc,
Encoding[] fixedEnc) {
RubyString str = arg0.convertToString();
regexpEnc = processDRegexpElement(runtime, options, regexpEnc, fixedEnc, str);
result = result == null ? str.strDup(runtime) : result.append19(str);
if (regexpEnc != null) result.setEncoding(regexpEnc);
return result;
}

if (string == null) {
string = (RubyString)str.dup();
} else {
string.append19(str);
private static Encoding processDRegexpElement(Ruby runtime, RegexpOptions options, Encoding regexpEnc, Encoding[] fixedEnc, RubyString str) {
Encoding strEnc = str.getEncoding();

if (options.isEncodingNone() && strEnc != ASCIIEncoding.INSTANCE) {
if (str.scanForCodeRange() != StringSupport.CR_7BIT) {
throw runtime.newRegexpError("/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
}
strEnc = ASCIIEncoding.INSTANCE;
}

if (regexpEnc != null) {
string.setEncoding(regexpEnc);
}
// This used to call preprocess, but the resulting bytelist was not
// used. Since the preprocessing error-checking can be done without
// creating a new bytelist, I added a "light" path.
RubyRegexp.preprocessLight(runtime, str.getByteList(), strEnc, fixedEnc, RegexpSupport.ErrorMode.PREPROCESS);

return string;
if (fixedEnc[0] != null) {
if (regexpEnc != null && regexpEnc != fixedEnc[0]) {
throw runtime.newRegexpError("encoding mismatch in dynamic regexp: " + new String(regexpEnc.getName()) + " and " + new String(fixedEnc[0].getName()));
}
regexpEnc = fixedEnc[0];
}
return regexpEnc;
}

private void check() {
50 changes: 50 additions & 0 deletions core/src/main/java/org/jruby/ir/runtime/IRRuntimeHelpers.java
Original file line number Diff line number Diff line change
@@ -1388,6 +1388,56 @@ public static RubyRegexp newDynamicRegexp(ThreadContext context, IRubyObject[] p
return re;
}

@JIT
public static RubyRegexp newDynamicRegexp(ThreadContext context, IRubyObject arg0, int embeddedOptions) {
RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
RubyString pattern = RubyRegexp.preprocessDRegexp(context.runtime, arg0, options);
RubyRegexp re = RubyRegexp.newDRegexp(context.runtime, pattern, options);
re.setLiteral();

return re;
}

@JIT
public static RubyRegexp newDynamicRegexp(ThreadContext context, IRubyObject arg0, IRubyObject arg1, int embeddedOptions) {
RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
RubyString pattern = RubyRegexp.preprocessDRegexp(context.runtime, arg0, arg1, options);
RubyRegexp re = RubyRegexp.newDRegexp(context.runtime, pattern, options);
re.setLiteral();

return re;
}

@JIT
public static RubyRegexp newDynamicRegexp(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, int embeddedOptions) {
RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
RubyString pattern = RubyRegexp.preprocessDRegexp(context.runtime, arg0, arg1, arg2, options);
RubyRegexp re = RubyRegexp.newDRegexp(context.runtime, pattern, options);
re.setLiteral();

return re;
}

@JIT
public static RubyRegexp newDynamicRegexp(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, int embeddedOptions) {
RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
RubyString pattern = RubyRegexp.preprocessDRegexp(context.runtime, arg0, arg1, arg2, arg3, options);
RubyRegexp re = RubyRegexp.newDRegexp(context.runtime, pattern, options);
re.setLiteral();

return re;
}

@JIT
public static RubyRegexp newDynamicRegexp(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, IRubyObject arg4, int embeddedOptions) {
RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
RubyString pattern = RubyRegexp.preprocessDRegexp(context.runtime, arg0, arg1, arg2, arg3, arg4, options);
RubyRegexp re = RubyRegexp.newDRegexp(context.runtime, pattern, options);
re.setLiteral();

return re;
}

public static RubyRegexp newLiteralRegexp(ThreadContext context, ByteList source, RegexpOptions options) {
RubyRegexp re = RubyRegexp.newRegexp(context.runtime, source, options);
re.setLiteral();
19 changes: 16 additions & 3 deletions core/src/main/java/org/jruby/ir/targets/DRegexpObjectSite.java
Original file line number Diff line number Diff line change
@@ -4,16 +4,15 @@
import com.headius.invokebinder.SmartBinder;
import org.jruby.RubyRegexp;
import org.jruby.RubyString;
import org.jruby.ir.runtime.IRRuntimeHelpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.RegexpOptions;
import org.objectweb.asm.Handle;
import org.objectweb.asm.Opcodes;

import java.lang.invoke.CallSite;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;

import static org.jruby.util.CodegenUtils.p;
import static org.jruby.util.CodegenUtils.sig;
@@ -23,6 +22,8 @@
*/
public class DRegexpObjectSite extends ConstructObjectSite {
protected final RegexpOptions options;
private volatile RubyRegexp cache;
private static final AtomicReferenceFieldUpdater UPDATER = AtomicReferenceFieldUpdater.newUpdater(DRegexpObjectSite.class, RubyRegexp.class, "cache");

public DRegexpObjectSite(MethodType type, int embeddedOptions) {
super(type);
@@ -59,11 +60,23 @@ public Binder prepareBinder() {
}

// dynamic regexp
public RubyRegexp construct(ThreadContext context, RubyString[] pieces) {
public RubyRegexp construct(ThreadContext context, RubyString[] pieces) throws Throwable {
RubyString pattern = RubyRegexp.preprocessDRegexp(context.runtime, pieces, options);
RubyRegexp re = RubyRegexp.newDRegexp(context.runtime, pattern, options);
re.setLiteral();

if (options.isOnce()) {
if (cache != null) {
// we cached a value, so re-call this site's target handle to get it
return cache;
}

// we don't care if this suceeds, just that it ony gets set once
UPDATER.compareAndSet(this, null, cache);

setTarget(Binder.from(type()).dropAll().constant(cache));
}

return re;
}
}
Loading

0 comments on commit 256e753

Please sign in to comment.