@@ -427,29 +427,34 @@ private static void encodingMatchError(Ruby runtime, Regex pattern, Encoding str
427
427
pattern .getEncoding () + " regexp with " + strEnc + " string)" );
428
428
}
429
429
430
- private Encoding checkEncoding (RubyString str , boolean warn ) {
431
- if (str .scanForCodeRange () == StringSupport .CR_BROKEN ) {
432
- throw getRuntime ().newArgumentError ("invalid byte sequence in " + str .getEncoding ());
430
+ private Encoding prepareEncoding (RubyString str , boolean warn ) {
431
+ Encoding enc = str .getEncoding ();
432
+ int cr = str .scanForCodeRange ();
433
+ if (cr == StringSupport .CR_BROKEN ) {
434
+ throw getRuntime ().newArgumentError ("invalid byte sequence in " + enc );
433
435
}
434
436
check ();
435
- Encoding enc = str .getEncoding ();
436
- if (!enc .isAsciiCompatible ()) {
437
- if (enc != pattern .getEncoding ()) encodingMatchError (getRuntime (), pattern , enc );
437
+ Encoding patternEnc = pattern .getEncoding ();
438
+ if (patternEnc == enc ) {
439
+ } else if (cr == StringSupport .CR_7BIT && patternEnc == USASCIIEncoding .INSTANCE ) {
440
+ enc = patternEnc ;
441
+ } else if (!enc .isAsciiCompatible ()) {
442
+ encodingMatchError (getRuntime (), pattern , enc );
438
443
} else if (options .isFixed ()) {
439
- if (enc != pattern . getEncoding () &&
440
- (!pattern . getEncoding () .isAsciiCompatible () ||
441
- str . scanForCodeRange () != StringSupport .CR_7BIT )) encodingMatchError (getRuntime (), pattern , enc );
442
- enc = pattern . getEncoding () ;
444
+ if (enc != patternEnc &&
445
+ (!patternEnc .isAsciiCompatible () ||
446
+ cr != StringSupport .CR_7BIT )) encodingMatchError (getRuntime (), pattern , enc );
447
+ enc = patternEnc ;
443
448
}
444
- if (warn && isEncodingNone () && enc != ASCIIEncoding .INSTANCE && str . scanForCodeRange () != StringSupport .CR_7BIT ) {
449
+ if (warn && isEncodingNone () && enc != ASCIIEncoding .INSTANCE && cr != StringSupport .CR_7BIT ) {
445
450
getRuntime ().getWarnings ().warn (ID .REGEXP_MATCH_AGAINST_STRING , "regexp match /.../n against to " + enc + " string" );
446
451
}
447
452
return enc ;
448
453
}
449
454
450
455
public final Regex preparePattern (RubyString str ) {
451
456
// checkEncoding does `check();` no need to here
452
- Encoding enc = checkEncoding (str , true );
457
+ Encoding enc = prepareEncoding (str , true );
453
458
if (enc == pattern .getEncoding ()) return pattern ;
454
459
return getPreprocessedRegexpFromCache (getRuntime (), this .str , enc , options , RegexpSupport .ErrorMode .PREPROCESS );
455
460
}
1 commit comments
duerst commentedon Jun 11, 2018
Does this solution work e.g. for
My guess would be no, but I can't test easily.
This is related to https://bugs.ruby-lang.org/issues/14838.