Skip to content

Commit a72224d

Browse files
lopexheadius
authored andcommittedMar 21, 2018
fix for #5086, RegexpError invalid pattern in look-behind for certain Regexps since 9.1.16.0
1 parent 64a2452 commit a72224d

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed
 

Diff for: ‎core/src/main/java/org/jruby/RubyRegexp.java

+17-12
Original file line numberDiff line numberDiff line change
@@ -427,29 +427,34 @@ private static void encodingMatchError(Ruby runtime, Regex pattern, Encoding str
427427
pattern.getEncoding() + " regexp with " + strEnc + " string)");
428428
}
429429

430-
private Encoding checkEncoding(RubyString str, boolean warn) {
431-
if (str.scanForCodeRange() == StringSupport.CR_BROKEN) {
432-
throw getRuntime().newArgumentError("invalid byte sequence in " + str.getEncoding());
430+
private Encoding prepareEncoding(RubyString str, boolean warn) {
431+
Encoding enc = str.getEncoding();
432+
int cr = str.scanForCodeRange();
433+
if (cr == StringSupport.CR_BROKEN) {
434+
throw getRuntime().newArgumentError("invalid byte sequence in " + enc);
433435
}
434436
check();
435-
Encoding enc = str.getEncoding();
436-
if (!enc.isAsciiCompatible()) {
437-
if (enc != pattern.getEncoding()) encodingMatchError(getRuntime(), pattern, enc);
437+
Encoding patternEnc = pattern.getEncoding();
438+
if (patternEnc == enc) {
439+
} else if (cr == StringSupport.CR_7BIT && patternEnc == USASCIIEncoding.INSTANCE) {
440+
enc = patternEnc;
441+
} else if (!enc.isAsciiCompatible()) {
442+
encodingMatchError(getRuntime(), pattern, enc);
438443
} else if (options.isFixed()) {
439-
if (enc != pattern.getEncoding() &&
440-
(!pattern.getEncoding().isAsciiCompatible() ||
441-
str.scanForCodeRange() != StringSupport.CR_7BIT)) encodingMatchError(getRuntime(), pattern, enc);
442-
enc = pattern.getEncoding();
444+
if (enc != patternEnc &&
445+
(!patternEnc.isAsciiCompatible() ||
446+
cr != StringSupport.CR_7BIT)) encodingMatchError(getRuntime(), pattern, enc);
447+
enc = patternEnc;
443448
}
444-
if (warn && isEncodingNone() && enc != ASCIIEncoding.INSTANCE && str.scanForCodeRange() != StringSupport.CR_7BIT) {
449+
if (warn && isEncodingNone() && enc != ASCIIEncoding.INSTANCE && cr != StringSupport.CR_7BIT) {
445450
getRuntime().getWarnings().warn(ID.REGEXP_MATCH_AGAINST_STRING, "regexp match /.../n against to " + enc + " string");
446451
}
447452
return enc;
448453
}
449454

450455
public final Regex preparePattern(RubyString str) {
451456
// checkEncoding does `check();` no need to here
452-
Encoding enc = checkEncoding(str, true);
457+
Encoding enc = prepareEncoding(str, true);
453458
if (enc == pattern.getEncoding()) return pattern;
454459
return getPreprocessedRegexpFromCache(getRuntime(), this.str, enc, options, RegexpSupport.ErrorMode.PREPROCESS);
455460
}

1 commit comments

Comments
 (1)

duerst commented on Jun 11, 2018

@duerst

Does this solution work e.g. for

"ƒïłê.bss" =~ %r{(?<!bss>)}i

My guess would be no, but I can't test easily.

This is related to https://bugs.ruby-lang.org/issues/14838.

Please sign in to comment.