Skip to content

Commit

Permalink
Showing 1 changed file with 17 additions and 12 deletions.
29 changes: 17 additions & 12 deletions core/src/main/java/org/jruby/RubyRegexp.java
Original file line number Diff line number Diff line change
@@ -427,29 +427,34 @@ private static void encodingMatchError(Ruby runtime, Regex pattern, Encoding str
pattern.getEncoding() + " regexp with " + strEnc + " string)");
}

private Encoding checkEncoding(RubyString str, boolean warn) {
if (str.scanForCodeRange() == StringSupport.CR_BROKEN) {
throw getRuntime().newArgumentError("invalid byte sequence in " + str.getEncoding());
private Encoding prepareEncoding(RubyString str, boolean warn) {
Encoding enc = str.getEncoding();
int cr = str.scanForCodeRange();
if (cr == StringSupport.CR_BROKEN) {
throw getRuntime().newArgumentError("invalid byte sequence in " + enc);
}
check();
Encoding enc = str.getEncoding();
if (!enc.isAsciiCompatible()) {
if (enc != pattern.getEncoding()) encodingMatchError(getRuntime(), pattern, enc);
Encoding patternEnc = pattern.getEncoding();
if (patternEnc == enc) {
} else if (cr == StringSupport.CR_7BIT && patternEnc == USASCIIEncoding.INSTANCE) {
enc = patternEnc;
} else if (!enc.isAsciiCompatible()) {
encodingMatchError(getRuntime(), pattern, enc);
} else if (options.isFixed()) {
if (enc != pattern.getEncoding() &&
(!pattern.getEncoding().isAsciiCompatible() ||
str.scanForCodeRange() != StringSupport.CR_7BIT)) encodingMatchError(getRuntime(), pattern, enc);
enc = pattern.getEncoding();
if (enc != patternEnc &&
(!patternEnc.isAsciiCompatible() ||
cr != StringSupport.CR_7BIT)) encodingMatchError(getRuntime(), pattern, enc);
enc = patternEnc;
}
if (warn && isEncodingNone() && enc != ASCIIEncoding.INSTANCE && str.scanForCodeRange() != StringSupport.CR_7BIT) {
if (warn && isEncodingNone() && enc != ASCIIEncoding.INSTANCE && cr != StringSupport.CR_7BIT) {
getRuntime().getWarnings().warn(ID.REGEXP_MATCH_AGAINST_STRING, "regexp match /.../n against to " + enc + " string");
}
return enc;
}

public final Regex preparePattern(RubyString str) {
// checkEncoding does `check();` no need to here
Encoding enc = checkEncoding(str, true);
Encoding enc = prepareEncoding(str, true);
if (enc == pattern.getEncoding()) return pattern;
return getPreprocessedRegexpFromCache(getRuntime(), this.str, enc, options, RegexpSupport.ErrorMode.PREPROCESS);
}

1 comment on commit a72224d

@duerst
Copy link

@duerst duerst commented on a72224d Jun 11, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this solution work e.g. for

"ƒïłê.bss" =~ %r{(?<!bss>)}i

My guess would be no, but I can't test easily.

This is related to https://bugs.ruby-lang.org/issues/14838.

Please sign in to comment.